From 1081962eacf2814fea6f4fa3255c530de14e4a24 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 19 Apr 2018 21:30:08 -0600 Subject: pylint: pay attention to unused variable warnings. This enables warnings produced by pylint for unused variables (W0612), and fixes the existing errors. --- cloudinit/sources/tests/test_init.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cloudinit/sources/tests') diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index e7fda22a..452e9219 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -278,7 +278,7 @@ class TestDataSource(CiTestCase): base_args = get_args(DataSource.get_hostname) # pylint: disable=W1505 # Import all DataSource subclasses so we can inspect them. modules = util.find_modules(os.path.dirname(os.path.dirname(__file__))) - for loc, name in modules.items(): + for _loc, name in modules.items(): mod_locs, _ = importer.find_module(name, ['cloudinit.sources'], []) if mod_locs: importer.import_module(mod_locs[0]) -- cgit v1.2.3 From cd1de5f47ab6b82f2c6fd61a5f6681f33b3e5705 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Wed, 23 May 2018 16:08:43 -0600 Subject: openstack: Allow discovery in init-local using dhclient in a sandbox. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Network has not yet been configured in the init-local stage so the openstack datasource will use dhcp-client to temporarily obtain an ipv4 address and query the metadata service at http://169.254.169.254 to get network_data.json configuration. If present, the datasource will return network_config version 1 config based on that network_data.json content. Previously OpenStack datasource only setup dhcp on the fallback interface so this represents a change in behavior to react to the full config provided by openstack. Also significant to OpenStack is the separation of a _crawl_data operation from get_data(). crawl_data walks the available metadata services and returns a dict of discovered content. get_data consumes the crawled_data,  caches it in the datasource and reacts to that data. /run/cloud-init/instance-data.json now published network_data.json or ec2_metadata key if that data is present on any datasource. The main reasons for the separation of crawl from get_data:  * Enable performance metrics of cloud-init's metadata crawls on each  * Enable cloud-init modules and scripts to query and consume metadata    content which may have updated/changed after cloud-init's initial cache    during instance boot. (Think hotplug) Also generalize common logic to base DataSource class/module:  * Move to a common UNSET variable up into base datasource module fix EC2,    ConfigDrive, OpenStack, SmartOS to use the global.  * Drop get_url_settings from Ec2, CloudStack and OpenStack and generalize    DataSource.get_url_params(). Allow subclasses to override url_max_wait,    url_timeout and url_retries params.  * Rename get_network_metadata bool to perform_dhcp_setup as it designates    whether EphemeralDHCPv4 setup is required before crawling metadata. LP: #1749717 --- cloudinit/sources/DataSourceCloudStack.py | 31 ++--- cloudinit/sources/DataSourceConfigDrive.py | 4 +- cloudinit/sources/DataSourceEc2.py | 48 ++----- cloudinit/sources/DataSourceOpenStack.py | 161 ++++++++++++++-------- cloudinit/sources/DataSourceSmartOS.py | 9 +- cloudinit/sources/__init__.py | 76 ++++++++++ cloudinit/sources/tests/test_init.py | 89 +++++++++++- tests/unittests/test_datasource/test_common.py | 1 + tests/unittests/test_datasource/test_openstack.py | 121 +++++++++++++++- 9 files changed, 416 insertions(+), 124 deletions(-) (limited to 'cloudinit/sources/tests') diff --git a/cloudinit/sources/DataSourceCloudStack.py b/cloudinit/sources/DataSourceCloudStack.py index 0df545fc..d4b758f2 100644 --- a/cloudinit/sources/DataSourceCloudStack.py +++ b/cloudinit/sources/DataSourceCloudStack.py @@ -68,6 +68,10 @@ class DataSourceCloudStack(sources.DataSource): dsname = 'CloudStack' + # Setup read_url parameters per get_url_params. + url_max_wait = 120 + url_timeout = 50 + def __init__(self, sys_cfg, distro, paths): sources.DataSource.__init__(self, sys_cfg, distro, paths) self.seed_dir = os.path.join(paths.seed_dir, 'cs') @@ -80,33 +84,18 @@ class DataSourceCloudStack(sources.DataSource): self.metadata_address = "http://%s/" % (self.vr_addr,) self.cfg = {} - def _get_url_settings(self): - mcfg = self.ds_cfg - max_wait = 120 - try: - max_wait = int(mcfg.get("max_wait", max_wait)) - except Exception: - util.logexc(LOG, "Failed to get max wait. using %s", max_wait) + def wait_for_metadata_service(self): + url_params = self.get_url_params() - if max_wait == 0: + if url_params.max_wait_seconds <= 0: return False - timeout = 50 - try: - timeout = int(mcfg.get("timeout", timeout)) - except Exception: - util.logexc(LOG, "Failed to get timeout, using %s", timeout) - - return (max_wait, timeout) - - def wait_for_metadata_service(self): - (max_wait, timeout) = self._get_url_settings() - urls = [uhelp.combine_url(self.metadata_address, 'latest/meta-data/instance-id')] start_time = time.time() - url = uhelp.wait_for_url(urls=urls, max_wait=max_wait, - timeout=timeout, status_cb=LOG.warn) + url = uhelp.wait_for_url( + urls=urls, max_wait=url_params.max_wait_seconds, + timeout=url_params.timeout_seconds, status_cb=LOG.warn) if url: LOG.debug("Using metadata source: '%s'", url) diff --git a/cloudinit/sources/DataSourceConfigDrive.py b/cloudinit/sources/DataSourceConfigDrive.py index 121cf215..4cb28977 100644 --- a/cloudinit/sources/DataSourceConfigDrive.py +++ b/cloudinit/sources/DataSourceConfigDrive.py @@ -43,7 +43,7 @@ class DataSourceConfigDrive(openstack.SourceMixin, sources.DataSource): self.version = None self.ec2_metadata = None self._network_config = None - self.network_json = None + self.network_json = sources.UNSET self.network_eni = None self.known_macs = None self.files = {} @@ -149,7 +149,7 @@ class DataSourceConfigDrive(openstack.SourceMixin, sources.DataSource): @property def network_config(self): if self._network_config is None: - if self.network_json is not None: + if self.network_json not in (None, sources.UNSET): LOG.debug("network config provided via network_json") self._network_config = openstack.convert_net_json( self.network_json, known_macs=self.known_macs) diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 21e9ef84..968ab3f7 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -27,8 +27,6 @@ SKIP_METADATA_URL_CODES = frozenset([uhelp.NOT_FOUND]) STRICT_ID_PATH = ("datasource", "Ec2", "strict_id") STRICT_ID_DEFAULT = "warn" -_unset = "_unset" - class Platforms(object): # TODO Rename and move to cloudinit.cloud.CloudNames @@ -59,15 +57,16 @@ class DataSourceEc2(sources.DataSource): # for extended metadata content. IPv6 support comes in 2016-09-02 extended_metadata_versions = ['2016-09-02'] + # Setup read_url parameters per get_url_params. + url_max_wait = 120 + url_timeout = 50 + _cloud_platform = None - _network_config = _unset # Used for caching calculated network config v1 + _network_config = sources.UNSET # Used to cache calculated network cfg v1 # Whether we want to get network configuration from the metadata service. - get_network_metadata = False - - # Track the discovered fallback nic for use in configuration generation. - _fallback_interface = None + perform_dhcp_setup = False def __init__(self, sys_cfg, distro, paths): super(DataSourceEc2, self).__init__(sys_cfg, distro, paths) @@ -98,7 +97,7 @@ class DataSourceEc2(sources.DataSource): elif self.cloud_platform == Platforms.NO_EC2_METADATA: return False - if self.get_network_metadata: # Setup networking in init-local stage. + if self.perform_dhcp_setup: # Setup networking in init-local stage. if util.is_FreeBSD(): LOG.debug("FreeBSD doesn't support running dhclient with -sf") return False @@ -158,27 +157,11 @@ class DataSourceEc2(sources.DataSource): else: return self.metadata['instance-id'] - def _get_url_settings(self): - mcfg = self.ds_cfg - max_wait = 120 - try: - max_wait = int(mcfg.get("max_wait", max_wait)) - except Exception: - util.logexc(LOG, "Failed to get max wait. using %s", max_wait) - - timeout = 50 - try: - timeout = max(0, int(mcfg.get("timeout", timeout))) - except Exception: - util.logexc(LOG, "Failed to get timeout, using %s", timeout) - - return (max_wait, timeout) - def wait_for_metadata_service(self): mcfg = self.ds_cfg - (max_wait, timeout) = self._get_url_settings() - if max_wait <= 0: + url_params = self.get_url_params() + if url_params.max_wait_seconds <= 0: return False # Remove addresses from the list that wont resolve. @@ -205,7 +188,8 @@ class DataSourceEc2(sources.DataSource): start_time = time.time() url = uhelp.wait_for_url( - urls=urls, max_wait=max_wait, timeout=timeout, status_cb=LOG.warn) + urls=urls, max_wait=url_params.max_wait_seconds, + timeout=url_params.timeout_seconds, status_cb=LOG.warn) if url: self.metadata_address = url2base[url] @@ -310,11 +294,11 @@ class DataSourceEc2(sources.DataSource): @property def network_config(self): """Return a network config dict for rendering ENI or netplan files.""" - if self._network_config != _unset: + if self._network_config != sources.UNSET: return self._network_config if self.metadata is None: - # this would happen if get_data hadn't been called. leave as _unset + # this would happen if get_data hadn't been called. leave as UNSET LOG.warning( "Unexpected call to network_config when metadata is None.") return None @@ -353,9 +337,7 @@ class DataSourceEc2(sources.DataSource): self._fallback_interface = _legacy_fbnic self.fallback_nic = None else: - self._fallback_interface = net.find_fallback_nic() - if self._fallback_interface is None: - LOG.warning("Did not find a fallback interface on EC2.") + return super(DataSourceEc2, self).fallback_interface return self._fallback_interface def _crawl_metadata(self): @@ -390,7 +372,7 @@ class DataSourceEc2Local(DataSourceEc2): metadata service. If the metadata service provides network configuration then render the network configuration for that instance based on metadata. """ - get_network_metadata = True # Get metadata network config if present + perform_dhcp_setup = True # Use dhcp before querying metadata def get_data(self): supported_platforms = (Platforms.AWS,) diff --git a/cloudinit/sources/DataSourceOpenStack.py b/cloudinit/sources/DataSourceOpenStack.py index fb166ae1..1a12a3f1 100644 --- a/cloudinit/sources/DataSourceOpenStack.py +++ b/cloudinit/sources/DataSourceOpenStack.py @@ -7,6 +7,7 @@ import time from cloudinit import log as logging +from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError from cloudinit import sources from cloudinit import url_helper from cloudinit import util @@ -27,46 +28,25 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource): dsname = "OpenStack" + _network_config = sources.UNSET # Used to cache calculated network cfg v1 + + # Whether we want to get network configuration from the metadata service. + perform_dhcp_setup = False + def __init__(self, sys_cfg, distro, paths): super(DataSourceOpenStack, self).__init__(sys_cfg, distro, paths) self.metadata_address = None self.ssl_details = util.fetch_ssl_details(self.paths) self.version = None self.files = {} - self.ec2_metadata = None + self.ec2_metadata = sources.UNSET + self.network_json = sources.UNSET def __str__(self): root = sources.DataSource.__str__(self) mstr = "%s [%s,ver=%s]" % (root, self.dsmode, self.version) return mstr - def _get_url_settings(self): - # TODO(harlowja): this is shared with ec2 datasource, we should just - # move it to a shared location instead... - # Note: the defaults here are different though. - - # max_wait < 0 indicates do not wait - max_wait = -1 - timeout = 10 - retries = 5 - - try: - max_wait = int(self.ds_cfg.get("max_wait", max_wait)) - except Exception: - util.logexc(LOG, "Failed to get max wait. using %s", max_wait) - - try: - timeout = max(0, int(self.ds_cfg.get("timeout", timeout))) - except Exception: - util.logexc(LOG, "Failed to get timeout, using %s", timeout) - - try: - retries = int(self.ds_cfg.get("retries", retries)) - except Exception: - util.logexc(LOG, "Failed to get retries. using %s", retries) - - return (max_wait, timeout, retries) - def wait_for_metadata_service(self): urls = self.ds_cfg.get("metadata_urls", [DEF_MD_URL]) filtered = [x for x in urls if util.is_resolvable_url(x)] @@ -86,10 +66,11 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource): md_urls.append(md_url) url2base[md_url] = url - (max_wait, timeout, _retries) = self._get_url_settings() + url_params = self.get_url_params() start_time = time.time() - avail_url = url_helper.wait_for_url(urls=md_urls, max_wait=max_wait, - timeout=timeout) + avail_url = url_helper.wait_for_url( + urls=md_urls, max_wait=url_params.max_wait_seconds, + timeout=url_params.timeout_seconds) if avail_url: LOG.debug("Using metadata source: '%s'", url2base[avail_url]) else: @@ -99,38 +80,64 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource): self.metadata_address = url2base.get(avail_url) return bool(avail_url) - def _get_data(self): - try: - if not self.wait_for_metadata_service(): - return False - except IOError: - return False + def check_instance_id(self, sys_cfg): + # quickly (local check only) if self.instance_id is still valid + return sources.instance_id_matches_system_uuid(self.get_instance_id()) - (_max_wait, timeout, retries) = self._get_url_settings() + @property + def network_config(self): + """Return a network config dict for rendering ENI or netplan files.""" + if self._network_config != sources.UNSET: + return self._network_config + + # RELEASE_BLOCKER: SRU to Xenial and Artful SRU should not provide + # network_config by default unless configured in /etc/cloud/cloud.cfg*. + # Patch Xenial and Artful before release to default to False. + if util.is_false(self.ds_cfg.get('apply_network_config', True)): + self._network_config = None + return self._network_config + if self.network_json == sources.UNSET: + # this would happen if get_data hadn't been called. leave as UNSET + LOG.warning( + 'Unexpected call to network_config when network_json is None.') + return None + + LOG.debug('network config provided via network_json') + self._network_config = openstack.convert_net_json( + self.network_json, known_macs=None) + return self._network_config - try: - results = util.log_time(LOG.debug, - 'Crawl of openstack metadata service', - read_metadata_service, - args=[self.metadata_address], - kwargs={'ssl_details': self.ssl_details, - 'retries': retries, - 'timeout': timeout}) - except openstack.NonReadable: - return False - except (openstack.BrokenMetadata, IOError): - util.logexc(LOG, "Broken metadata address %s", - self.metadata_address) - return False + def _get_data(self): + """Crawl metadata, parse and persist that data for this instance. + + @return: True when metadata discovered indicates OpenStack datasource. + False when unable to contact metadata service or when metadata + format is invalid or disabled. + """ + if self.perform_dhcp_setup: # Setup networking in init-local stage. + try: + with EphemeralDHCPv4(self.fallback_interface): + results = util.log_time( + logfunc=LOG.debug, msg='Crawl of metadata service', + func=self._crawl_metadata) + except (NoDHCPLeaseError, sources.InvalidMetaDataException) as e: + util.logexc(LOG, str(e)) + return False + else: + try: + results = self._crawl_metadata() + except sources.InvalidMetaDataException as e: + util.logexc(LOG, str(e)) + return False self.dsmode = self._determine_dsmode([results.get('dsmode')]) if self.dsmode == sources.DSMODE_DISABLED: return False - md = results.get('metadata', {}) md = util.mergemanydict([md, DEFAULT_METADATA]) self.metadata = md self.ec2_metadata = results.get('ec2-metadata') + self.network_json = results.get('networkdata') self.userdata_raw = results.get('userdata') self.version = results['version'] self.files.update(results.get('files', {})) @@ -145,9 +152,50 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource): return True - def check_instance_id(self, sys_cfg): - # quickly (local check only) if self.instance_id is still valid - return sources.instance_id_matches_system_uuid(self.get_instance_id()) + def _crawl_metadata(self): + """Crawl metadata service when available. + + @returns: Dictionary with all metadata discovered for this datasource. + @raise: InvalidMetaDataException on unreadable or broken + metadata. + """ + try: + if not self.wait_for_metadata_service(): + raise sources.InvalidMetaDataException( + 'No active metadata service found') + except IOError as e: + raise sources.InvalidMetaDataException( + 'IOError contacting metadata service: {error}'.format( + error=str(e))) + + url_params = self.get_url_params() + + try: + result = util.log_time( + LOG.debug, 'Crawl of openstack metadata service', + read_metadata_service, args=[self.metadata_address], + kwargs={'ssl_details': self.ssl_details, + 'retries': url_params.num_retries, + 'timeout': url_params.timeout_seconds}) + except openstack.NonReadable as e: + raise sources.InvalidMetaDataException(str(e)) + except (openstack.BrokenMetadata, IOError): + msg = 'Broken metadata address {addr}'.format( + addr=self.metadata_address) + raise sources.InvalidMetaDataException(msg) + return result + + +class DataSourceOpenStackLocal(DataSourceOpenStack): + """Run in init-local using a dhcp discovery prior to metadata crawl. + + In init-local, no network is available. This subclass sets up minimal + networking with dhclient on a viable nic so that it can talk to the + metadata service. If the metadata service provides network configuration + then render the network configuration for that instance based on metadata. + """ + + perform_dhcp_setup = True # Get metadata network config if present def read_metadata_service(base_url, ssl_details=None, @@ -159,6 +207,7 @@ def read_metadata_service(base_url, ssl_details=None, # Used to match classes to dependencies datasources = [ + (DataSourceOpenStackLocal, (sources.DEP_FILESYSTEM,)), (DataSourceOpenStack, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)), ] diff --git a/cloudinit/sources/DataSourceSmartOS.py b/cloudinit/sources/DataSourceSmartOS.py index fcb46b14..c91e4d59 100644 --- a/cloudinit/sources/DataSourceSmartOS.py +++ b/cloudinit/sources/DataSourceSmartOS.py @@ -165,9 +165,8 @@ class DataSourceSmartOS(sources.DataSource): dsname = "Joyent" - _unset = "_unset" - smartos_type = _unset - md_client = _unset + smartos_type = sources.UNSET + md_client = sources.UNSET def __init__(self, sys_cfg, distro, paths): sources.DataSource.__init__(self, sys_cfg, distro, paths) @@ -189,12 +188,12 @@ class DataSourceSmartOS(sources.DataSource): return "%s [client=%s]" % (root, self.md_client) def _init(self): - if self.smartos_type == self._unset: + if self.smartos_type == sources.UNSET: self.smartos_type = get_smartos_environ() if self.smartos_type is None: self.md_client = None - if self.md_client == self._unset: + if self.md_client == sources.UNSET: self.md_client = jmc_client_factory( smartos_type=self.smartos_type, metadata_sockfile=self.ds_cfg['metadata_sockfile'], diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index df0b374a..90d74575 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -9,6 +9,7 @@ # This file is part of cloud-init. See LICENSE file for license information. import abc +from collections import namedtuple import copy import json import os @@ -17,6 +18,7 @@ import six from cloudinit.atomic_helper import write_json from cloudinit import importer from cloudinit import log as logging +from cloudinit import net from cloudinit import type_utils from cloudinit import user_data as ud from cloudinit import util @@ -41,6 +43,8 @@ INSTANCE_JSON_FILE = 'instance-data.json' # Key which can be provide a cloud's official product name to cloud-init METADATA_CLOUD_NAME_KEY = 'cloud-name' +UNSET = "_unset" + LOG = logging.getLogger(__name__) @@ -48,6 +52,11 @@ class DataSourceNotFoundException(Exception): pass +class InvalidMetaDataException(Exception): + """Raised when metadata is broken, unavailable or disabled.""" + pass + + def process_base64_metadata(metadata, key_path=''): """Strip ci-b64 prefix and return metadata with base64-encoded-keys set.""" md_copy = copy.deepcopy(metadata) @@ -68,6 +77,10 @@ def process_base64_metadata(metadata, key_path=''): return md_copy +URLParams = namedtuple( + 'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries']) + + @six.add_metaclass(abc.ABCMeta) class DataSource(object): @@ -81,6 +94,14 @@ class DataSource(object): # Cached cloud_name as determined by _get_cloud_name _cloud_name = None + # Track the discovered fallback nic for use in configuration generation. + _fallback_interface = None + + # read_url_params + url_max_wait = -1 # max_wait < 0 means do not wait + url_timeout = 10 # timeout for each metadata url read attempt + url_retries = 5 # number of times to retry url upon 404 + def __init__(self, sys_cfg, distro, paths, ud_proc=None): self.sys_cfg = sys_cfg self.distro = distro @@ -128,6 +149,14 @@ class DataSource(object): 'meta-data': self.metadata, 'user-data': self.get_userdata_raw(), 'vendor-data': self.get_vendordata_raw()}} + if hasattr(self, 'network_json'): + network_json = getattr(self, 'network_json') + if network_json != UNSET: + instance_data['ds']['network_json'] = network_json + if hasattr(self, 'ec2_metadata'): + ec2_metadata = getattr(self, 'ec2_metadata') + if ec2_metadata != UNSET: + instance_data['ds']['ec2_metadata'] = ec2_metadata instance_data.update( self._get_standardized_metadata()) try: @@ -149,6 +178,42 @@ class DataSource(object): 'Subclasses of DataSource must implement _get_data which' ' sets self.metadata, vendordata_raw and userdata_raw.') + def get_url_params(self): + """Return the Datasource's prefered url_read parameters. + + Subclasses may override url_max_wait, url_timeout, url_retries. + + @return: A URLParams object with max_wait_seconds, timeout_seconds, + num_retries. + """ + max_wait = self.url_max_wait + try: + max_wait = int(self.ds_cfg.get("max_wait", self.url_max_wait)) + except ValueError: + util.logexc( + LOG, "Config max_wait '%s' is not an int, using default '%s'", + self.ds_cfg.get("max_wait"), max_wait) + + timeout = self.url_timeout + try: + timeout = max( + 0, int(self.ds_cfg.get("timeout", self.url_timeout))) + except ValueError: + timeout = self.url_timeout + util.logexc( + LOG, "Config timeout '%s' is not an int, using default '%s'", + self.ds_cfg.get('timeout'), timeout) + + retries = self.url_retries + try: + retries = int(self.ds_cfg.get("retries", self.url_retries)) + except Exception: + util.logexc( + LOG, "Config retries '%s' is not an int, using default '%s'", + self.ds_cfg.get('retries'), retries) + + return URLParams(max_wait, timeout, retries) + def get_userdata(self, apply_filter=False): if self.userdata is None: self.userdata = self.ud_proc.process(self.get_userdata_raw()) @@ -161,6 +226,17 @@ class DataSource(object): self.vendordata = self.ud_proc.process(self.get_vendordata_raw()) return self.vendordata + @property + def fallback_interface(self): + """Determine the network interface used during local network config.""" + if self._fallback_interface is None: + self._fallback_interface = net.find_fallback_nic() + if self._fallback_interface is None: + LOG.warning( + "Did not find a fallback interface on %s.", + self.cloud_name) + return self._fallback_interface + @property def cloud_name(self): """Return lowercase cloud name as determined by the datasource. diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index 452e9219..d5bc98a4 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -17,6 +17,7 @@ from cloudinit import util class DataSourceTestSubclassNet(DataSource): dsname = 'MyTestSubclass' + url_max_wait = 55 def __init__(self, sys_cfg, distro, paths, custom_userdata=None): super(DataSourceTestSubclassNet, self).__init__( @@ -70,8 +71,7 @@ class TestDataSource(CiTestCase): """Init uses DataSource.dsname for sourcing ds_cfg.""" sys_cfg = {'datasource': {'MyTestSubclass': {'key2': False}}} distro = 'distrotest' # generally should be a Distro object - paths = Paths({}) - datasource = DataSourceTestSubclassNet(sys_cfg, distro, paths) + datasource = DataSourceTestSubclassNet(sys_cfg, distro, self.paths) self.assertEqual({'key2': False}, datasource.ds_cfg) def test_str_is_classname(self): @@ -81,6 +81,91 @@ class TestDataSource(CiTestCase): 'DataSourceTestSubclassNet', str(DataSourceTestSubclassNet('', '', self.paths))) + def test_datasource_get_url_params_defaults(self): + """get_url_params default url config settings for the datasource.""" + params = self.datasource.get_url_params() + self.assertEqual(params.max_wait_seconds, self.datasource.url_max_wait) + self.assertEqual(params.timeout_seconds, self.datasource.url_timeout) + self.assertEqual(params.num_retries, self.datasource.url_retries) + + def test_datasource_get_url_params_subclassed(self): + """Subclasses can override get_url_params defaults.""" + sys_cfg = {'datasource': {'MyTestSubclass': {'key2': False}}} + distro = 'distrotest' # generally should be a Distro object + datasource = DataSourceTestSubclassNet(sys_cfg, distro, self.paths) + expected = (datasource.url_max_wait, datasource.url_timeout, + datasource.url_retries) + url_params = datasource.get_url_params() + self.assertNotEqual(self.datasource.get_url_params(), url_params) + self.assertEqual(expected, url_params) + + def test_datasource_get_url_params_ds_config_override(self): + """Datasource configuration options can override url param defaults.""" + sys_cfg = { + 'datasource': { + 'MyTestSubclass': { + 'max_wait': '1', 'timeout': '2', 'retries': '3'}}} + datasource = DataSourceTestSubclassNet( + sys_cfg, self.distro, self.paths) + expected = (1, 2, 3) + url_params = datasource.get_url_params() + self.assertNotEqual( + (datasource.url_max_wait, datasource.url_timeout, + datasource.url_retries), + url_params) + self.assertEqual(expected, url_params) + + def test_datasource_get_url_params_is_zero_or_greater(self): + """get_url_params ignores timeouts with a value below 0.""" + # Set an override that is below 0 which gets ignored. + sys_cfg = {'datasource': {'_undef': {'timeout': '-1'}}} + datasource = DataSource(sys_cfg, self.distro, self.paths) + (_max_wait, timeout, _retries) = datasource.get_url_params() + self.assertEqual(0, timeout) + + def test_datasource_get_url_uses_defaults_on_errors(self): + """On invalid system config values for url_params defaults are used.""" + # All invalid values should be logged + sys_cfg = {'datasource': { + '_undef': { + 'max_wait': 'nope', 'timeout': 'bug', 'retries': 'nonint'}}} + datasource = DataSource(sys_cfg, self.distro, self.paths) + url_params = datasource.get_url_params() + expected = (datasource.url_max_wait, datasource.url_timeout, + datasource.url_retries) + self.assertEqual(expected, url_params) + logs = self.logs.getvalue() + expected_logs = [ + "Config max_wait 'nope' is not an int, using default '-1'", + "Config timeout 'bug' is not an int, using default '10'", + "Config retries 'nonint' is not an int, using default '5'", + ] + for log in expected_logs: + self.assertIn(log, logs) + + @mock.patch('cloudinit.sources.net.find_fallback_nic') + def test_fallback_interface_is_discovered(self, m_get_fallback_nic): + """The fallback_interface is discovered via find_fallback_nic.""" + m_get_fallback_nic.return_value = 'nic9' + self.assertEqual('nic9', self.datasource.fallback_interface) + + @mock.patch('cloudinit.sources.net.find_fallback_nic') + def test_fallback_interface_logs_undiscovered(self, m_get_fallback_nic): + """Log a warning when fallback_interface can not discover the nic.""" + self.datasource._cloud_name = 'MySupahCloud' + m_get_fallback_nic.return_value = None # Couldn't discover nic + self.assertIsNone(self.datasource.fallback_interface) + self.assertEqual( + 'WARNING: Did not find a fallback interface on MySupahCloud.\n', + self.logs.getvalue()) + + @mock.patch('cloudinit.sources.net.find_fallback_nic') + def test_wb_fallback_interface_is_cached(self, m_get_fallback_nic): + """The fallback_interface is cached and won't be rediscovered.""" + self.datasource._fallback_interface = 'nic10' + self.assertEqual('nic10', self.datasource.fallback_interface) + m_get_fallback_nic.assert_not_called() + def test__get_data_unimplemented(self): """Raise an error when _get_data is not implemented.""" with self.assertRaises(NotImplementedError) as context_manager: diff --git a/tests/unittests/test_datasource/test_common.py b/tests/unittests/test_datasource/test_common.py index ec333888..0d35dc29 100644 --- a/tests/unittests/test_datasource/test_common.py +++ b/tests/unittests/test_datasource/test_common.py @@ -40,6 +40,7 @@ DEFAULT_LOCAL = [ OVF.DataSourceOVF, SmartOS.DataSourceSmartOS, Ec2.DataSourceEc2Local, + OpenStack.DataSourceOpenStackLocal, ] DEFAULT_NETWORK = [ diff --git a/tests/unittests/test_datasource/test_openstack.py b/tests/unittests/test_datasource/test_openstack.py index bb180c08..fad73b21 100644 --- a/tests/unittests/test_datasource/test_openstack.py +++ b/tests/unittests/test_datasource/test_openstack.py @@ -16,7 +16,7 @@ from six import StringIO from cloudinit import helpers from cloudinit import settings -from cloudinit.sources import convert_vendordata +from cloudinit.sources import convert_vendordata, UNSET from cloudinit.sources import DataSourceOpenStack as ds from cloudinit.sources.helpers import openstack from cloudinit import util @@ -129,6 +129,8 @@ def _read_metadata_service(): class TestOpenStackDataSource(test_helpers.HttprettyTestCase): + + with_logs = True VERSION = 'latest' def setUp(self): @@ -223,11 +225,11 @@ class TestOpenStackDataSource(test_helpers.HttprettyTestCase): _register_uris(self.VERSION, {}, {}, os_files) self.assertRaises(openstack.BrokenMetadata, _read_metadata_service) - def test_datasource(self): + @test_helpers.mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') + def test_datasource(self, m_dhcp): _register_uris(self.VERSION, EC2_FILES, EC2_META, OS_FILES) - ds_os = ds.DataSourceOpenStack(settings.CFG_BUILTIN, - None, - helpers.Paths({'run_dir': self.tmp})) + ds_os = ds.DataSourceOpenStack( + settings.CFG_BUILTIN, None, helpers.Paths({'run_dir': self.tmp})) self.assertIsNone(ds_os.version) found = ds_os.get_data() self.assertTrue(found) @@ -241,6 +243,36 @@ class TestOpenStackDataSource(test_helpers.HttprettyTestCase): self.assertEqual(2, len(ds_os.files)) self.assertEqual(VENDOR_DATA, ds_os.vendordata_pure) self.assertIsNone(ds_os.vendordata_raw) + m_dhcp.assert_not_called() + + @hp.activate + @test_helpers.mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network') + @test_helpers.mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') + def test_local_datasource(self, m_dhcp, m_net): + """OpenStackLocal calls EphemeralDHCPNetwork and gets instance data.""" + _register_uris(self.VERSION, EC2_FILES, EC2_META, OS_FILES) + ds_os_local = ds.DataSourceOpenStackLocal( + settings.CFG_BUILTIN, None, helpers.Paths({'run_dir': self.tmp})) + ds_os_local._fallback_interface = 'eth9' # Monkey patch for dhcp + m_dhcp.return_value = [{ + 'interface': 'eth9', 'fixed-address': '192.168.2.9', + 'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0', + 'broadcast-address': '192.168.2.255'}] + + self.assertIsNone(ds_os_local.version) + found = ds_os_local.get_data() + self.assertTrue(found) + self.assertEqual(2, ds_os_local.version) + md = dict(ds_os_local.metadata) + md.pop('instance-id', None) + md.pop('local-hostname', None) + self.assertEqual(OSTACK_META, md) + self.assertEqual(EC2_META, ds_os_local.ec2_metadata) + self.assertEqual(USER_DATA, ds_os_local.userdata_raw) + self.assertEqual(2, len(ds_os_local.files)) + self.assertEqual(VENDOR_DATA, ds_os_local.vendordata_pure) + self.assertIsNone(ds_os_local.vendordata_raw) + m_dhcp.assert_called_with('eth9') def test_bad_datasource_meta(self): os_files = copy.deepcopy(OS_FILES) @@ -255,6 +287,10 @@ class TestOpenStackDataSource(test_helpers.HttprettyTestCase): found = ds_os.get_data() self.assertFalse(found) self.assertIsNone(ds_os.version) + self.assertIn( + 'InvalidMetaDataException: Broken metadata address' + ' http://169.254.169.25', + self.logs.getvalue()) def test_no_datasource(self): os_files = copy.deepcopy(OS_FILES) @@ -274,6 +310,52 @@ class TestOpenStackDataSource(test_helpers.HttprettyTestCase): self.assertFalse(found) self.assertIsNone(ds_os.version) + def test_network_config_disabled_by_datasource_config(self): + """The network_config can be disabled from datasource config.""" + mock_path = ( + 'cloudinit.sources.DataSourceOpenStack.openstack.' + 'convert_net_json') + ds_os = ds.DataSourceOpenStack( + settings.CFG_BUILTIN, None, helpers.Paths({'run_dir': self.tmp})) + ds_os.ds_cfg = {'apply_network_config': False} + sample_json = {'links': [{'ethernet_mac_address': 'mymac'}], + 'networks': [], 'services': []} + ds_os.network_json = sample_json # Ignore this content from metadata + with test_helpers.mock.patch(mock_path) as m_convert_json: + self.assertIsNone(ds_os.network_config) + m_convert_json.assert_not_called() + + def test_network_config_from_network_json(self): + """The datasource gets network_config from network_data.json.""" + mock_path = ( + 'cloudinit.sources.DataSourceOpenStack.openstack.' + 'convert_net_json') + example_cfg = {'version': 1, 'config': []} + ds_os = ds.DataSourceOpenStack( + settings.CFG_BUILTIN, None, helpers.Paths({'run_dir': self.tmp})) + sample_json = {'links': [{'ethernet_mac_address': 'mymac'}], + 'networks': [], 'services': []} + ds_os.network_json = sample_json + with test_helpers.mock.patch(mock_path) as m_convert_json: + m_convert_json.return_value = example_cfg + self.assertEqual(example_cfg, ds_os.network_config) + self.assertIn( + 'network config provided via network_json', self.logs.getvalue()) + m_convert_json.assert_called_with(sample_json, known_macs=None) + + def test_network_config_cached(self): + """The datasource caches the network_config property.""" + mock_path = ( + 'cloudinit.sources.DataSourceOpenStack.openstack.' + 'convert_net_json') + example_cfg = {'version': 1, 'config': []} + ds_os = ds.DataSourceOpenStack( + settings.CFG_BUILTIN, None, helpers.Paths({'run_dir': self.tmp})) + ds_os._network_config = example_cfg + with test_helpers.mock.patch(mock_path) as m_convert_json: + self.assertEqual(example_cfg, ds_os.network_config) + m_convert_json.assert_not_called() + def test_disabled_datasource(self): os_files = copy.deepcopy(OS_FILES) os_meta = copy.deepcopy(OSTACK_META) @@ -296,6 +378,35 @@ class TestOpenStackDataSource(test_helpers.HttprettyTestCase): self.assertFalse(found) self.assertIsNone(ds_os.version) + @hp.activate + def test_wb__crawl_metadata_does_not_persist(self): + """_crawl_metadata returns current metadata and does not cache.""" + _register_uris(self.VERSION, EC2_FILES, EC2_META, OS_FILES) + ds_os = ds.DataSourceOpenStack( + settings.CFG_BUILTIN, None, helpers.Paths({'run_dir': self.tmp})) + crawled_data = ds_os._crawl_metadata() + self.assertEqual(UNSET, ds_os.ec2_metadata) + self.assertIsNone(ds_os.userdata_raw) + self.assertEqual(0, len(ds_os.files)) + self.assertIsNone(ds_os.vendordata_raw) + self.assertEqual( + ['dsmode', 'ec2-metadata', 'files', 'metadata', 'networkdata', + 'userdata', 'vendordata', 'version'], + sorted(crawled_data.keys())) + self.assertEqual('local', crawled_data['dsmode']) + self.assertEqual(EC2_META, crawled_data['ec2-metadata']) + self.assertEqual(2, len(crawled_data['files'])) + md = copy.deepcopy(crawled_data['metadata']) + md.pop('instance-id') + md.pop('local-hostname') + self.assertEqual(OSTACK_META, md) + self.assertEqual( + json.loads(OS_FILES['openstack/latest/network_data.json']), + crawled_data['networkdata']) + self.assertEqual(USER_DATA, crawled_data['userdata']) + self.assertEqual(VENDOR_DATA, crawled_data['vendordata']) + self.assertEqual(2, crawled_data['version']) + class TestVendorDataLoading(test_helpers.TestCase): def cvj(self, data): -- cgit v1.2.3 From be9ecc12823607b4709b64408aee137bfdfc7d01 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Sun, 1 Jul 2018 16:46:23 -0600 Subject: update_metadata: a datasource can support network re-config every boot Very basic type definitions are now defined to distinguish 'boot' events from 'new instance (first boot)'. Event types will now be handed to a datasource.update_metadata method which can determine whether to refresh its metadata and re-render configuration based on that source event. A datasource can 'subscribe' to an event by setting up the update_events attribute on the datasource class which describe what config scope is updated by a list of matching events. By default datasources will have the following update_events: {'network': [EventType.BOOT_NEW_INSTANCE]} This setting says the datasource will re-write network configuration only on first boot of a new instance or when the instance id changes. New methods are now present on the datasource: - clear_cached_attrs: Resets cached datasource attributes to values listed in datasource.cached_attr_defaults. This is performed prior to processing a fresh metadata process to avoid keeping old/invalid cached data around. - update_metadata: accepts source_event_types to determine if the metadata should be crawled again and processed --- cloudinit/event.py | 17 +++ cloudinit/sources/__init__.py | 78 +++++++++++- cloudinit/sources/tests/test_init.py | 83 ++++++++++++- cloudinit/stages.py | 14 ++- cloudinit/tests/test_stages.py | 231 +++++++++++++++++++++++++++++++++++ 5 files changed, 417 insertions(+), 6 deletions(-) create mode 100644 cloudinit/event.py create mode 100644 cloudinit/tests/test_stages.py (limited to 'cloudinit/sources/tests') diff --git a/cloudinit/event.py b/cloudinit/event.py new file mode 100644 index 00000000..f7b311fb --- /dev/null +++ b/cloudinit/event.py @@ -0,0 +1,17 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Classes and functions related to event handling.""" + + +# Event types which can generate maintenance requests for cloud-init. +class EventType(object): + BOOT = "System boot" + BOOT_NEW_INSTANCE = "New instance first boot" + + # TODO: Cloud-init will grow support for the follow event types: + # UDEV + # METADATA_CHANGE + # USER_REQUEST + + +# vi: ts=4 expandtab diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 90d74575..f424316a 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -19,6 +19,7 @@ from cloudinit.atomic_helper import write_json from cloudinit import importer from cloudinit import log as logging from cloudinit import net +from cloudinit.event import EventType from cloudinit import type_utils from cloudinit import user_data as ud from cloudinit import util @@ -102,6 +103,25 @@ class DataSource(object): url_timeout = 10 # timeout for each metadata url read attempt url_retries = 5 # number of times to retry url upon 404 + # The datasource defines a list of supported EventTypes during which + # the datasource can react to changes in metadata and regenerate + # network configuration on metadata changes. + # A datasource which supports writing network config on each system boot + # would set update_events = {'network': [EventType.BOOT]} + + # Default: generate network config on new instance id (first boot). + update_events = {'network': [EventType.BOOT_NEW_INSTANCE]} + + # N-tuple listing default values for any metadata-related class + # attributes cached on an instance by a process_data runs. These attribute + # values are reset via clear_cached_attrs during any update_metadata call. + cached_attr_defaults = ( + ('ec2_metadata', UNSET), ('network_json', UNSET), + ('metadata', {}), ('userdata', None), ('userdata_raw', None), + ('vendordata', None), ('vendordata_raw', None)) + + _dirty_cache = False + def __init__(self, sys_cfg, distro, paths, ud_proc=None): self.sys_cfg = sys_cfg self.distro = distro @@ -134,11 +154,31 @@ class DataSource(object): 'region': self.region, 'availability-zone': self.availability_zone}} + def clear_cached_attrs(self, attr_defaults=()): + """Reset any cached metadata attributes to datasource defaults. + + @param attr_defaults: Optional tuple of (attr, value) pairs to + set instead of cached_attr_defaults. + """ + if not self._dirty_cache: + return + if attr_defaults: + attr_values = attr_defaults + else: + attr_values = self.cached_attr_defaults + + for attribute, value in attr_values: + if hasattr(self, attribute): + setattr(self, attribute, value) + if not attr_defaults: + self._dirty_cache = False + def get_data(self): """Datasources implement _get_data to setup metadata and userdata_raw. Minimally, the datasource should return a boolean True on success. """ + self._dirty_cache = True return_value = self._get_data() json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) if not return_value: @@ -174,6 +214,7 @@ class DataSource(object): return return_value def _get_data(self): + """Walk metadata sources, process crawled data and save attributes.""" raise NotImplementedError( 'Subclasses of DataSource must implement _get_data which' ' sets self.metadata, vendordata_raw and userdata_raw.') @@ -416,6 +457,41 @@ class DataSource(object): def get_package_mirror_info(self): return self.distro.get_package_mirror_info(data_source=self) + def update_metadata(self, source_event_types): + """Refresh cached metadata if the datasource supports this event. + + The datasource has a list of update_events which + trigger refreshing all cached metadata as well as refreshing the + network configuration. + + @param source_event_types: List of EventTypes which may trigger a + metadata update. + + @return True if the datasource did successfully update cached metadata + due to source_event_type. + """ + supported_events = {} + for event in source_event_types: + for update_scope, update_events in self.update_events.items(): + if event in update_events: + if not supported_events.get(update_scope): + supported_events[update_scope] = [] + supported_events[update_scope].append(event) + for scope, matched_events in supported_events.items(): + LOG.debug( + "Update datasource metadata and %s config due to events: %s", + scope, ', '.join(matched_events)) + # Each datasource has a cached config property which needs clearing + # Once cleared that config property will be regenerated from + # current metadata. + self.clear_cached_attrs((('_%s_config' % scope, UNSET),)) + if supported_events: + self.clear_cached_attrs() + result = self.get_data() + if result: + return True + return False + def check_instance_id(self, sys_cfg): # quickly (local check only) if self.instance_id is still return False @@ -520,7 +596,7 @@ def find_source(sys_cfg, distro, paths, ds_deps, cfg_list, pkg_list, reporter): with myrep: LOG.debug("Seeing if we can get any data from %s", cls) s = cls(sys_cfg, distro, paths) - if s.get_data(): + if s.update_metadata([EventType.BOOT_NEW_INSTANCE]): myrep.message = "found %s data from %s" % (mode, name) return (s, type_utils.obj_name(cls)) except Exception: diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index d5bc98a4..dcd221be 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -5,10 +5,11 @@ import os import six import stat +from cloudinit.event import EventType from cloudinit.helpers import Paths from cloudinit import importer from cloudinit.sources import ( - INSTANCE_JSON_FILE, DataSource) + INSTANCE_JSON_FILE, DataSource, UNSET) from cloudinit.tests.helpers import CiTestCase, skipIf, mock from cloudinit.user_data import UserDataProcessor from cloudinit import util @@ -381,3 +382,83 @@ class TestDataSource(CiTestCase): get_args(grandchild.get_hostname), # pylint: disable=W1505 '%s does not implement DataSource.get_hostname params' % grandchild) + + def test_clear_cached_attrs_resets_cached_attr_class_attributes(self): + """Class attributes listed in cached_attr_defaults are reset.""" + count = 0 + # Setup values for all cached class attributes + for attr, value in self.datasource.cached_attr_defaults: + setattr(self.datasource, attr, count) + count += 1 + self.datasource._dirty_cache = True + self.datasource.clear_cached_attrs() + for attr, value in self.datasource.cached_attr_defaults: + self.assertEqual(value, getattr(self.datasource, attr)) + + def test_clear_cached_attrs_noops_on_clean_cache(self): + """Class attributes listed in cached_attr_defaults are reset.""" + count = 0 + # Setup values for all cached class attributes + for attr, _ in self.datasource.cached_attr_defaults: + setattr(self.datasource, attr, count) + count += 1 + self.datasource._dirty_cache = False # Fake clean cache + self.datasource.clear_cached_attrs() + count = 0 + for attr, _ in self.datasource.cached_attr_defaults: + self.assertEqual(count, getattr(self.datasource, attr)) + count += 1 + + def test_clear_cached_attrs_skips_non_attr_class_attributes(self): + """Skip any cached_attr_defaults which aren't class attributes.""" + self.datasource._dirty_cache = True + self.datasource.clear_cached_attrs() + for attr in ('ec2_metadata', 'network_json'): + self.assertFalse(hasattr(self.datasource, attr)) + + def test_clear_cached_attrs_of_custom_attrs(self): + """Custom attr_values can be passed to clear_cached_attrs.""" + self.datasource._dirty_cache = True + cached_attr_name = self.datasource.cached_attr_defaults[0][0] + setattr(self.datasource, cached_attr_name, 'himom') + self.datasource.myattr = 'orig' + self.datasource.clear_cached_attrs( + attr_defaults=(('myattr', 'updated'),)) + self.assertEqual('himom', getattr(self.datasource, cached_attr_name)) + self.assertEqual('updated', self.datasource.myattr) + + def test_update_metadata_only_acts_on_supported_update_events(self): + """update_metadata won't get_data on unsupported update events.""" + self.assertEqual( + {'network': [EventType.BOOT_NEW_INSTANCE]}, + self.datasource.update_events) + + def fake_get_data(): + raise Exception('get_data should not be called') + + self.datasource.get_data = fake_get_data + self.assertFalse( + self.datasource.update_metadata( + source_event_types=[EventType.BOOT])) + + def test_update_metadata_returns_true_on_supported_update_event(self): + """update_metadata returns get_data response on supported events.""" + + def fake_get_data(): + return True + + self.datasource.get_data = fake_get_data + self.datasource._network_config = 'something' + self.datasource._dirty_cache = True + self.assertTrue( + self.datasource.update_metadata( + source_event_types=[ + EventType.BOOT, EventType.BOOT_NEW_INSTANCE])) + self.assertEqual(UNSET, self.datasource._network_config) + self.assertIn( + "DEBUG: Update datasource metadata and network config due to" + " events: New instance first boot", + self.logs.getvalue()) + + +# vi: ts=4 expandtab diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 286607bf..c132b57d 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -22,6 +22,8 @@ from cloudinit.handlers import cloud_config as cc_part from cloudinit.handlers import shell_script as ss_part from cloudinit.handlers import upstart_job as up_part +from cloudinit.event import EventType + from cloudinit import cloud from cloudinit import config from cloudinit import distros @@ -648,10 +650,14 @@ class Init(object): except Exception as e: LOG.warning("Failed to rename devices: %s", e) - if (self.datasource is not NULL_DATA_SOURCE and - not self.is_new_instance()): - LOG.debug("not a new instance. network config is not applied.") - return + if self.datasource is not NULL_DATA_SOURCE: + if not self.is_new_instance(): + if not self.datasource.update_metadata([EventType.BOOT]): + LOG.debug( + "No network config applied. Neither a new instance" + " nor datasource network update on '%s' event", + EventType.BOOT) + return LOG.info("Applying network configuration from %s bringup=%s: %s", src, bring_up, netcfg) diff --git a/cloudinit/tests/test_stages.py b/cloudinit/tests/test_stages.py new file mode 100644 index 00000000..94b6b255 --- /dev/null +++ b/cloudinit/tests/test_stages.py @@ -0,0 +1,231 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Tests related to cloudinit.stages module.""" + +import os + +from cloudinit import stages +from cloudinit import sources + +from cloudinit.event import EventType +from cloudinit.util import write_file + +from cloudinit.tests.helpers import CiTestCase, mock + +TEST_INSTANCE_ID = 'i-testing' + + +class FakeDataSource(sources.DataSource): + + def __init__(self, paths=None, userdata=None, vendordata=None, + network_config=''): + super(FakeDataSource, self).__init__({}, None, paths=paths) + self.metadata = {'instance-id': TEST_INSTANCE_ID} + self.userdata_raw = userdata + self.vendordata_raw = vendordata + self._network_config = None + if network_config: # Permit for None value to setup attribute + self._network_config = network_config + + @property + def network_config(self): + return self._network_config + + def _get_data(self): + return True + + +class TestInit(CiTestCase): + with_logs = True + + def setUp(self): + super(TestInit, self).setUp() + self.tmpdir = self.tmp_dir() + self.init = stages.Init() + # Setup fake Paths for Init to reference + self.init._cfg = {'system_info': { + 'distro': 'ubuntu', 'paths': {'cloud_dir': self.tmpdir, + 'run_dir': self.tmpdir}}} + self.init.datasource = FakeDataSource(paths=self.init.paths) + + def test_wb__find_networking_config_disabled(self): + """find_networking_config returns no config when disabled.""" + disable_file = os.path.join( + self.init.paths.get_cpath('data'), 'upgraded-network') + write_file(disable_file, '') + self.assertEqual( + (None, disable_file), + self.init._find_networking_config()) + + @mock.patch('cloudinit.stages.cmdline.read_kernel_cmdline_config') + def test_wb__find_networking_config_disabled_by_kernel(self, m_cmdline): + """find_networking_config returns when disabled by kernel cmdline.""" + m_cmdline.return_value = {'config': 'disabled'} + self.assertEqual( + (None, 'cmdline'), + self.init._find_networking_config()) + self.assertEqual('DEBUG: network config disabled by cmdline\n', + self.logs.getvalue()) + + @mock.patch('cloudinit.stages.cmdline.read_kernel_cmdline_config') + def test_wb__find_networking_config_disabled_by_datasrc(self, m_cmdline): + """find_networking_config returns when disabled by datasource cfg.""" + m_cmdline.return_value = {} # Kernel doesn't disable networking + self.init._cfg = {'system_info': {'paths': {'cloud_dir': self.tmpdir}}, + 'network': {}} # system config doesn't disable + + self.init.datasource = FakeDataSource( + network_config={'config': 'disabled'}) + self.assertEqual( + (None, 'ds'), + self.init._find_networking_config()) + self.assertEqual('DEBUG: network config disabled by ds\n', + self.logs.getvalue()) + + @mock.patch('cloudinit.stages.cmdline.read_kernel_cmdline_config') + def test_wb__find_networking_config_disabled_by_sysconfig(self, m_cmdline): + """find_networking_config returns when disabled by system config.""" + m_cmdline.return_value = {} # Kernel doesn't disable networking + self.init._cfg = {'system_info': {'paths': {'cloud_dir': self.tmpdir}}, + 'network': {'config': 'disabled'}} + self.assertEqual( + (None, 'system_cfg'), + self.init._find_networking_config()) + self.assertEqual('DEBUG: network config disabled by system_cfg\n', + self.logs.getvalue()) + + @mock.patch('cloudinit.stages.cmdline.read_kernel_cmdline_config') + def test_wb__find_networking_config_returns_kernel(self, m_cmdline): + """find_networking_config returns kernel cmdline config if present.""" + expected_cfg = {'config': ['fakekernel']} + m_cmdline.return_value = expected_cfg + self.init._cfg = {'system_info': {'paths': {'cloud_dir': self.tmpdir}}, + 'network': {'config': ['fakesys_config']}} + self.init.datasource = FakeDataSource( + network_config={'config': ['fakedatasource']}) + self.assertEqual( + (expected_cfg, 'cmdline'), + self.init._find_networking_config()) + + @mock.patch('cloudinit.stages.cmdline.read_kernel_cmdline_config') + def test_wb__find_networking_config_returns_system_cfg(self, m_cmdline): + """find_networking_config returns system config when present.""" + m_cmdline.return_value = {} # No kernel network config + expected_cfg = {'config': ['fakesys_config']} + self.init._cfg = {'system_info': {'paths': {'cloud_dir': self.tmpdir}}, + 'network': expected_cfg} + self.init.datasource = FakeDataSource( + network_config={'config': ['fakedatasource']}) + self.assertEqual( + (expected_cfg, 'system_cfg'), + self.init._find_networking_config()) + + @mock.patch('cloudinit.stages.cmdline.read_kernel_cmdline_config') + def test_wb__find_networking_config_returns_datasrc_cfg(self, m_cmdline): + """find_networking_config returns datasource net config if present.""" + m_cmdline.return_value = {} # No kernel network config + # No system config for network in setUp + expected_cfg = {'config': ['fakedatasource']} + self.init.datasource = FakeDataSource(network_config=expected_cfg) + self.assertEqual( + (expected_cfg, 'ds'), + self.init._find_networking_config()) + + @mock.patch('cloudinit.stages.cmdline.read_kernel_cmdline_config') + def test_wb__find_networking_config_returns_fallback(self, m_cmdline): + """find_networking_config returns fallback config if not defined.""" + m_cmdline.return_value = {} # Kernel doesn't disable networking + # Neither datasource nor system_info disable or provide network + + fake_cfg = {'config': [{'type': 'physical', 'name': 'eth9'}], + 'version': 1} + + def fake_generate_fallback(): + return fake_cfg + + # Monkey patch distro which gets cached on self.init + distro = self.init.distro + distro.generate_fallback_config = fake_generate_fallback + self.assertEqual( + (fake_cfg, 'fallback'), + self.init._find_networking_config()) + self.assertNotIn('network config disabled', self.logs.getvalue()) + + def test_apply_network_config_disabled(self): + """Log when network is disabled by upgraded-network.""" + disable_file = os.path.join( + self.init.paths.get_cpath('data'), 'upgraded-network') + + def fake_network_config(): + return (None, disable_file) + + self.init._find_networking_config = fake_network_config + + self.init.apply_network_config(True) + self.assertIn( + 'INFO: network config is disabled by %s' % disable_file, + self.logs.getvalue()) + + @mock.patch('cloudinit.distros.ubuntu.Distro') + def test_apply_network_on_new_instance(self, m_ubuntu): + """Call distro apply_network_config methods on is_new_instance.""" + net_cfg = { + 'version': 1, 'config': [ + {'subnets': [{'type': 'dhcp'}], 'type': 'physical', + 'name': 'eth9', 'mac_address': '42:42:42:42:42:42'}]} + + def fake_network_config(): + return net_cfg, 'fallback' + + self.init._find_networking_config = fake_network_config + self.init.apply_network_config(True) + self.init.distro.apply_network_config_names.assert_called_with(net_cfg) + self.init.distro.apply_network_config.assert_called_with( + net_cfg, bring_up=True) + + @mock.patch('cloudinit.distros.ubuntu.Distro') + def test_apply_network_on_same_instance_id(self, m_ubuntu): + """Only call distro.apply_network_config_names on same instance id.""" + old_instance_id = os.path.join( + self.init.paths.get_cpath('data'), 'instance-id') + write_file(old_instance_id, TEST_INSTANCE_ID) + net_cfg = { + 'version': 1, 'config': [ + {'subnets': [{'type': 'dhcp'}], 'type': 'physical', + 'name': 'eth9', 'mac_address': '42:42:42:42:42:42'}]} + + def fake_network_config(): + return net_cfg, 'fallback' + + self.init._find_networking_config = fake_network_config + self.init.apply_network_config(True) + self.init.distro.apply_network_config_names.assert_called_with(net_cfg) + self.init.distro.apply_network_config.assert_not_called() + self.assertIn( + 'No network config applied. Neither a new instance' + " nor datasource network update on '%s' event" % EventType.BOOT, + self.logs.getvalue()) + + @mock.patch('cloudinit.distros.ubuntu.Distro') + def test_apply_network_on_datasource_allowed_event(self, m_ubuntu): + """Apply network if datasource.update_metadata permits BOOT event.""" + old_instance_id = os.path.join( + self.init.paths.get_cpath('data'), 'instance-id') + write_file(old_instance_id, TEST_INSTANCE_ID) + net_cfg = { + 'version': 1, 'config': [ + {'subnets': [{'type': 'dhcp'}], 'type': 'physical', + 'name': 'eth9', 'mac_address': '42:42:42:42:42:42'}]} + + def fake_network_config(): + return net_cfg, 'fallback' + + self.init._find_networking_config = fake_network_config + self.init.datasource = FakeDataSource(paths=self.init.paths) + self.init.datasource.update_events = {'network': [EventType.BOOT]} + self.init.apply_network_config(True) + self.init.distro.apply_network_config_names.assert_called_with(net_cfg) + self.init.distro.apply_network_config.assert_called_with( + net_cfg, bring_up=True) + +# vi: ts=4 expandtab -- cgit v1.2.3 From 3146c96a7a740427ac6e65f9b29239f247ecc617 Mon Sep 17 00:00:00 2001 From: Mike Gerdts Date: Thu, 26 Jul 2018 15:31:52 +0000 Subject: update_metadata re-config on every boot comments and tests not quite right The comment in update_metadata() that explains how a datasource should enable network reconfig on every boot presumes that EventType.BOOT_NEW_INSTANCE is a subset of EventType.BOOT. That's not the case, and as such a datasource that needs to configure networking when it is a new instance and every boot needs to include both event types. To make the situation above easier to debug, update_metadata() now logs when it returns false. To make it so that datasources do not need to test before appending to the update_events['network'], it is changed from a list to a set. test_update_metadata_only_acts_on_supported_update_events is updated to allow datasources to support EventType.BOOT. Author: Mike Gerdts --- cloudinit/sources/__init__.py | 12 +++++++----- cloudinit/sources/tests/test_init.py | 3 ++- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'cloudinit/sources/tests') diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index f424316a..06e613f8 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -103,14 +103,14 @@ class DataSource(object): url_timeout = 10 # timeout for each metadata url read attempt url_retries = 5 # number of times to retry url upon 404 - # The datasource defines a list of supported EventTypes during which + # The datasource defines a set of supported EventTypes during which # the datasource can react to changes in metadata and regenerate # network configuration on metadata changes. # A datasource which supports writing network config on each system boot - # would set update_events = {'network': [EventType.BOOT]} + # would call update_events['network'].add(EventType.BOOT). # Default: generate network config on new instance id (first boot). - update_events = {'network': [EventType.BOOT_NEW_INSTANCE]} + update_events = {'network': set([EventType.BOOT_NEW_INSTANCE])} # N-tuple listing default values for any metadata-related class # attributes cached on an instance by a process_data runs. These attribute @@ -475,8 +475,8 @@ class DataSource(object): for update_scope, update_events in self.update_events.items(): if event in update_events: if not supported_events.get(update_scope): - supported_events[update_scope] = [] - supported_events[update_scope].append(event) + supported_events[update_scope] = set() + supported_events[update_scope].add(event) for scope, matched_events in supported_events.items(): LOG.debug( "Update datasource metadata and %s config due to events: %s", @@ -490,6 +490,8 @@ class DataSource(object): result = self.get_data() if result: return True + LOG.debug("Datasource %s not updated for events: %s", self, + ', '.join(source_event_types)) return False def check_instance_id(self, sys_cfg): diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index dcd221be..9e939c1e 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -429,8 +429,9 @@ class TestDataSource(CiTestCase): def test_update_metadata_only_acts_on_supported_update_events(self): """update_metadata won't get_data on unsupported update events.""" + self.datasource.update_events['network'].discard(EventType.BOOT) self.assertEqual( - {'network': [EventType.BOOT_NEW_INSTANCE]}, + {'network': set([EventType.BOOT_NEW_INSTANCE])}, self.datasource.update_events) def fake_get_data(): -- cgit v1.2.3 From aaffd59431fe05932a66016db941fe197c4e7620 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Fri, 17 Aug 2018 20:25:31 +0000 Subject: Add datasource Oracle Compute Infrastructure (OCI). This adds a Oracle specific datasource that functions with OCI. It is a simplified version of the OpenStack metadata server with support for vendor-data. It does not support the OCI-C (classic) platform. Also here is a move of BrokenMetadata to common 'sources' as this was the third occurrence of that class. --- .pylintrc | 3 +- cloudinit/apport.py | 1 + cloudinit/settings.py | 1 + cloudinit/sources/DataSourceIBMCloud.py | 13 +- cloudinit/sources/DataSourceOpenStack.py | 12 +- cloudinit/sources/DataSourceOracle.py | 233 +++++++++++++++ cloudinit/sources/__init__.py | 4 + cloudinit/sources/helpers/openstack.py | 6 +- cloudinit/sources/tests/test_oracle.py | 331 ++++++++++++++++++++++ doc/rtd/topics/datasources.rst | 1 + doc/rtd/topics/datasources/oracle.rst | 26 ++ tests/unittests/test_datasource/test_common.py | 2 + tests/unittests/test_datasource/test_openstack.py | 13 +- tests/unittests/test_ds_identify.py | 19 ++ tools/ds-identify | 8 +- 15 files changed, 650 insertions(+), 23 deletions(-) create mode 100644 cloudinit/sources/DataSourceOracle.py create mode 100644 cloudinit/sources/tests/test_oracle.py create mode 100644 doc/rtd/topics/datasources/oracle.rst (limited to 'cloudinit/sources/tests') diff --git a/.pylintrc b/.pylintrc index 3bfa0c81..e376b48b 100644 --- a/.pylintrc +++ b/.pylintrc @@ -61,7 +61,8 @@ ignored-modules= # List of class names for which member attributes should not be checked (useful # for classes with dynamically set attributes). This supports the use of # qualified names. -ignored-classes=optparse.Values,thread._local +# argparse.Namespace from https://github.com/PyCQA/pylint/issues/2413 +ignored-classes=argparse.Namespace,optparse.Values,thread._local # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E1101 when accessed. Python regular diff --git a/cloudinit/apport.py b/cloudinit/apport.py index 130ff269..22cb7fde 100644 --- a/cloudinit/apport.py +++ b/cloudinit/apport.py @@ -30,6 +30,7 @@ KNOWN_CLOUD_NAMES = [ 'NoCloud', 'OpenNebula', 'OpenStack', + 'Oracle', 'OVF', 'OpenTelekomCloud', 'Scaleway', diff --git a/cloudinit/settings.py b/cloudinit/settings.py index dde5749d..ea367cb7 100644 --- a/cloudinit/settings.py +++ b/cloudinit/settings.py @@ -38,6 +38,7 @@ CFG_BUILTIN = { 'Scaleway', 'Hetzner', 'IBMCloud', + 'Oracle', # At the end to act as a 'catch' when none of the above work... 'None', ], diff --git a/cloudinit/sources/DataSourceIBMCloud.py b/cloudinit/sources/DataSourceIBMCloud.py index 01106ec0..a5358148 100644 --- a/cloudinit/sources/DataSourceIBMCloud.py +++ b/cloudinit/sources/DataSourceIBMCloud.py @@ -295,7 +295,7 @@ def read_md(): results = metadata_from_dir(path) else: results = util.mount_cb(path, metadata_from_dir) - except BrokenMetadata as e: + except sources.BrokenMetadata as e: raise RuntimeError( "Failed reading IBM config disk (platform=%s path=%s): %s" % (platform, path, e)) @@ -304,10 +304,6 @@ def read_md(): return ret -class BrokenMetadata(IOError): - pass - - def metadata_from_dir(source_dir): """Walk source_dir extracting standardized metadata. @@ -352,12 +348,13 @@ def metadata_from_dir(source_dir): try: data = transl(raw) except Exception as e: - raise BrokenMetadata("Failed decoding %s: %s" % (path, e)) + raise sources.BrokenMetadata( + "Failed decoding %s: %s" % (path, e)) results[name] = data if results.get('metadata_raw') is None: - raise BrokenMetadata( + raise sources.BrokenMetadata( "%s missing required file 'meta_data.json'" % source_dir) results['metadata'] = {} @@ -368,7 +365,7 @@ def metadata_from_dir(source_dir): try: md['random_seed'] = base64.b64decode(md_raw['random_seed']) except (ValueError, TypeError) as e: - raise BrokenMetadata( + raise sources.BrokenMetadata( "Badly formatted metadata random_seed entry: %s" % e) renames = ( diff --git a/cloudinit/sources/DataSourceOpenStack.py b/cloudinit/sources/DataSourceOpenStack.py index b9ade90d..4a015240 100644 --- a/cloudinit/sources/DataSourceOpenStack.py +++ b/cloudinit/sources/DataSourceOpenStack.py @@ -13,6 +13,7 @@ from cloudinit import url_helper from cloudinit import util from cloudinit.sources.helpers import openstack +from cloudinit.sources import DataSourceOracle as oracle LOG = logging.getLogger(__name__) @@ -28,8 +29,7 @@ DMI_PRODUCT_NOVA = 'OpenStack Nova' DMI_PRODUCT_COMPUTE = 'OpenStack Compute' VALID_DMI_PRODUCT_NAMES = [DMI_PRODUCT_NOVA, DMI_PRODUCT_COMPUTE] DMI_ASSET_TAG_OPENTELEKOM = 'OpenTelekomCloud' -DMI_ASSET_TAG_ORACLE_CLOUD = 'OracleCloud.com' -VALID_DMI_ASSET_TAGS = [DMI_ASSET_TAG_OPENTELEKOM, DMI_ASSET_TAG_ORACLE_CLOUD] +VALID_DMI_ASSET_TAGS = [DMI_ASSET_TAG_OPENTELEKOM] class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource): @@ -122,8 +122,10 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource): False when unable to contact metadata service or when metadata format is invalid or disabled. """ - if not detect_openstack(): + oracle_considered = 'Oracle' in self.sys_cfg.get('datasource_list') + if not detect_openstack(accept_oracle=not oracle_considered): return False + if self.perform_dhcp_setup: # Setup networking in init-local stage. try: with EphemeralDHCPv4(self.fallback_interface): @@ -215,7 +217,7 @@ def read_metadata_service(base_url, ssl_details=None, return reader.read_v2() -def detect_openstack(): +def detect_openstack(accept_oracle=False): """Return True when a potential OpenStack platform is detected.""" if not util.is_x86(): return True # Non-Intel cpus don't properly report dmi product names @@ -224,6 +226,8 @@ def detect_openstack(): return True elif util.read_dmi_data('chassis-asset-tag') in VALID_DMI_ASSET_TAGS: return True + elif accept_oracle and oracle._is_platform_viable(): + return True elif util.get_proc_env(1).get('product_name') == DMI_PRODUCT_NOVA: return True return False diff --git a/cloudinit/sources/DataSourceOracle.py b/cloudinit/sources/DataSourceOracle.py new file mode 100644 index 00000000..fab39af3 --- /dev/null +++ b/cloudinit/sources/DataSourceOracle.py @@ -0,0 +1,233 @@ +# This file is part of cloud-init. See LICENSE file for license information. +"""Datasource for Oracle (OCI/Oracle Cloud Infrastructure) + +OCI provides a OpenStack like metadata service which provides only +'2013-10-17' and 'latest' versions.. + +Notes: + * This datasource does not support the OCI-Classic. OCI-Classic + provides an EC2 lookalike metadata service. + * The uuid provided in DMI data is not the same as the meta-data provided + instance-id, but has an equivalent lifespan. + * We do need to support upgrade from an instance that cloud-init + identified as OpenStack. + * Both bare-metal and vms use iscsi root + * Both bare-metal and vms provide chassis-asset-tag of OracleCloud.com +""" + +from cloudinit.url_helper import combine_url, readurl, UrlError +from cloudinit.net import dhcp +from cloudinit import net +from cloudinit import sources +from cloudinit import util +from cloudinit.net import cmdline +from cloudinit import log as logging + +import json +import re + +LOG = logging.getLogger(__name__) + +CHASSIS_ASSET_TAG = "OracleCloud.com" +METADATA_ENDPOINT = "http://169.254.169.254/openstack/" + + +class DataSourceOracle(sources.DataSource): + + dsname = 'Oracle' + system_uuid = None + vendordata_pure = None + _network_config = sources.UNSET + + def _is_platform_viable(self): + """Check platform environment to report if this datasource may run.""" + return _is_platform_viable() + + def _get_data(self): + if not self._is_platform_viable(): + return False + + # network may be configured if iscsi root. If that is the case + # then read_kernel_cmdline_config will return non-None. + if _is_iscsi_root(): + data = self.crawl_metadata() + else: + with dhcp.EphemeralDHCPv4(net.find_fallback_nic()): + data = self.crawl_metadata() + + self._crawled_metadata = data + vdata = data['2013-10-17'] + + self.userdata_raw = vdata.get('user_data') + self.system_uuid = vdata['system_uuid'] + + vd = vdata.get('vendor_data') + if vd: + self.vendordata_pure = vd + try: + self.vendordata_raw = sources.convert_vendordata(vd) + except ValueError as e: + LOG.warning("Invalid content in vendor-data: %s", e) + self.vendordata_raw = None + + mdcopies = ('public_keys',) + md = dict([(k, vdata['meta_data'].get(k)) + for k in mdcopies if k in vdata['meta_data']]) + + mdtrans = ( + # oracle meta_data.json name, cloudinit.datasource.metadata name + ('availability_zone', 'availability-zone'), + ('hostname', 'local-hostname'), + ('launch_index', 'launch-index'), + ('uuid', 'instance-id'), + ) + for dsname, ciname in mdtrans: + if dsname in vdata['meta_data']: + md[ciname] = vdata['meta_data'][dsname] + + self.metadata = md + return True + + def crawl_metadata(self): + return read_metadata() + + def check_instance_id(self, sys_cfg): + """quickly check (local only) if self.instance_id is still valid + + On Oracle, the dmi-provided system uuid differs from the instance-id + but has the same life-span.""" + return sources.instance_id_matches_system_uuid(self.system_uuid) + + def get_public_ssh_keys(self): + return sources.normalize_pubkey_data(self.metadata.get('public_keys')) + + @property + def network_config(self): + """Network config is read from initramfs provided files + If none is present, then we fall back to fallback configuration. + + One thing to note here is that this method is not currently + considered at all if there is is kernel/initramfs provided + data. In that case, stages considers that the cmdline data + overrides datasource provided data and does not consult here. + + We nonetheless return cmdline provided config if present + and fallback to generate fallback.""" + if self._network_config == sources.UNSET: + cmdline_cfg = cmdline.read_kernel_cmdline_config() + if cmdline_cfg: + self._network_config = cmdline_cfg + else: + self._network_config = self.distro.generate_fallback_config() + return self._network_config + + +def _read_system_uuid(): + sys_uuid = util.read_dmi_data('system-uuid') + return None if sys_uuid is None else sys_uuid.lower() + + +def _is_platform_viable(): + asset_tag = util.read_dmi_data('chassis-asset-tag') + return asset_tag == CHASSIS_ASSET_TAG + + +def _is_iscsi_root(): + return bool(cmdline.read_kernel_cmdline_config()) + + +def _load_index(content): + """Return a list entries parsed from content. + + OpenStack's metadata service returns a newline delimited list + of items. Oracle's implementation has html formatted list of links. + The parser here just grabs targets from + and throws away "../". + + Oracle has accepted that to be buggy and may fix in the future + to instead return a '\n' delimited plain text list. This function + will continue to work if that change is made.""" + if not content.lower().startswith(""): + return content.splitlines() + items = re.findall( + r'href="(?P[^"]*)"', content, re.MULTILINE | re.IGNORECASE) + return [i for i in items if not i.startswith(".")] + + +def read_metadata(endpoint_base=METADATA_ENDPOINT, sys_uuid=None, + version='2013-10-17'): + """Read metadata, return a dictionary. + + Each path listed in the index will be represented in the dictionary. + If the path ends in .json, then the content will be decoded and + populated into the dictionary. + + The system uuid (/sys/class/dmi/id/product_uuid) is also populated. + Example: given paths = ('user_data', 'meta_data.json') + This would return: + {version: {'user_data': b'blob', 'meta_data': json.loads(blob.decode()) + 'system_uuid': '3b54f2e0-3ab2-458d-b770-af9926eee3b2'}} + """ + endpoint = combine_url(endpoint_base, version) + "/" + if sys_uuid is None: + sys_uuid = _read_system_uuid() + if not sys_uuid: + raise sources.BrokenMetadata("Failed to read system uuid.") + + try: + resp = readurl(endpoint) + if not resp.ok(): + raise sources.BrokenMetadata( + "Bad response from %s: %s" % (endpoint, resp.code)) + except UrlError as e: + raise sources.BrokenMetadata( + "Failed to read index at %s: %s" % (endpoint, e)) + + entries = _load_index(resp.contents.decode('utf-8')) + LOG.debug("index url %s contained: %s", endpoint, entries) + + # meta_data.json is required. + mdj = 'meta_data.json' + if mdj not in entries: + raise sources.BrokenMetadata( + "Required field '%s' missing in index at %s" % (mdj, endpoint)) + + ret = {'system_uuid': sys_uuid} + for path in entries: + response = readurl(combine_url(endpoint, path)) + if path.endswith(".json"): + ret[path.rpartition(".")[0]] = ( + json.loads(response.contents.decode('utf-8'))) + else: + ret[path] = response.contents + + return {version: ret} + + +# Used to match classes to dependencies +datasources = [ + (DataSourceOracle, (sources.DEP_FILESYSTEM,)), +] + + +# Return a list of data sources that match this set of dependencies +def get_datasource_list(depends): + return sources.list_from_depends(depends, datasources) + + +if __name__ == "__main__": + import argparse + import os + + parser = argparse.ArgumentParser(description='Query Oracle Cloud Metadata') + parser.add_argument("--endpoint", metavar="URL", + help="The url of the metadata service.", + default=METADATA_ENDPOINT) + args = parser.parse_args() + sys_uuid = "uuid-not-available-not-root" if os.geteuid() != 0 else None + + data = read_metadata(endpoint_base=args.endpoint, sys_uuid=sys_uuid) + data['is_platform_viable'] = _is_platform_viable() + print(util.json_dumps(data)) + +# vi: ts=4 expandtab diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 06e613f8..41fde9ba 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -671,6 +671,10 @@ def convert_vendordata(data, recurse=True): raise ValueError("Unknown data type for vendordata: %s" % type(data)) +class BrokenMetadata(IOError): + pass + + # 'depends' is a list of dependencies (DEP_FILESYSTEM) # ds_list is a list of 2 item lists # ds_list = [ diff --git a/cloudinit/sources/helpers/openstack.py b/cloudinit/sources/helpers/openstack.py index a4cf0667..8f9c1441 100644 --- a/cloudinit/sources/helpers/openstack.py +++ b/cloudinit/sources/helpers/openstack.py @@ -21,6 +21,8 @@ from cloudinit import sources from cloudinit import url_helper from cloudinit import util +from cloudinit.sources import BrokenMetadata + # See https://docs.openstack.org/user-guide/cli-config-drive.html LOG = logging.getLogger(__name__) @@ -68,10 +70,6 @@ class NonReadable(IOError): pass -class BrokenMetadata(IOError): - pass - - class SourceMixin(object): def _ec2_name_to_device(self, name): if not self.ec2_metadata: diff --git a/cloudinit/sources/tests/test_oracle.py b/cloudinit/sources/tests/test_oracle.py new file mode 100644 index 00000000..7599126c --- /dev/null +++ b/cloudinit/sources/tests/test_oracle.py @@ -0,0 +1,331 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from cloudinit.sources import DataSourceOracle as oracle +from cloudinit.sources import BrokenMetadata +from cloudinit import helpers + +from cloudinit.tests import helpers as test_helpers + +from textwrap import dedent +import argparse +import httpretty +import json +import mock +import os +import six +import uuid + +DS_PATH = "cloudinit.sources.DataSourceOracle" +MD_VER = "2013-10-17" + + +class TestDataSourceOracle(test_helpers.CiTestCase): + """Test datasource DataSourceOracle.""" + + ds_class = oracle.DataSourceOracle + + my_uuid = str(uuid.uuid4()) + my_md = {"uuid": "ocid1.instance.oc1.phx.abyhqlj", + "name": "ci-vm1", "availability_zone": "phx-ad-3", + "hostname": "ci-vm1hostname", + "launch_index": 0, "files": [], + "public_keys": {"0": "ssh-rsa AAAAB3N...== user@host"}, + "meta": {}} + + def _patch_instance(self, inst, patches): + """Patch an instance of a class 'inst'. + for each name, kwargs in patches: + inst.name = mock.Mock(**kwargs) + returns a namespace object that has + namespace.name = mock.Mock(**kwargs) + Do not bother with cleanup as instance is assumed transient.""" + mocks = argparse.Namespace() + for name, kwargs in patches.items(): + imock = mock.Mock(name=name, spec=getattr(inst, name), **kwargs) + setattr(mocks, name, imock) + setattr(inst, name, imock) + return mocks + + def _get_ds(self, sys_cfg=None, distro=None, paths=None, ud_proc=None, + patches=None): + if sys_cfg is None: + sys_cfg = {} + if patches is None: + patches = {} + if paths is None: + tmpd = self.tmp_dir() + dirs = {'cloud_dir': self.tmp_path('cloud_dir', tmpd), + 'run_dir': self.tmp_path('run_dir')} + for d in dirs.values(): + os.mkdir(d) + paths = helpers.Paths(dirs) + + ds = self.ds_class(sys_cfg=sys_cfg, distro=distro, + paths=paths, ud_proc=ud_proc) + + return ds, self._patch_instance(ds, patches) + + def test_platform_not_viable_returns_false(self): + ds, mocks = self._get_ds( + patches={'_is_platform_viable': {'return_value': False}}) + self.assertFalse(ds._get_data()) + mocks._is_platform_viable.assert_called_once_with() + + @mock.patch(DS_PATH + "._is_iscsi_root", return_value=True) + def test_without_userdata(self, m_is_iscsi_root): + """If no user-data is provided, it should not be in return dict.""" + ds, mocks = self._get_ds(patches={ + '_is_platform_viable': {'return_value': True}, + 'crawl_metadata': { + 'return_value': { + MD_VER: {'system_uuid': self.my_uuid, + 'meta_data': self.my_md}}}}) + self.assertTrue(ds._get_data()) + mocks._is_platform_viable.assert_called_once_with() + mocks.crawl_metadata.assert_called_once_with() + self.assertEqual(self.my_uuid, ds.system_uuid) + self.assertEqual(self.my_md['availability_zone'], ds.availability_zone) + self.assertIn(self.my_md["public_keys"]["0"], ds.get_public_ssh_keys()) + self.assertEqual(self.my_md['uuid'], ds.get_instance_id()) + self.assertIsNone(ds.userdata_raw) + + @mock.patch(DS_PATH + "._is_iscsi_root", return_value=True) + def test_with_vendordata(self, m_is_iscsi_root): + """Test with vendor data.""" + vd = {'cloud-init': '#cloud-config\nkey: value'} + ds, mocks = self._get_ds(patches={ + '_is_platform_viable': {'return_value': True}, + 'crawl_metadata': { + 'return_value': { + MD_VER: {'system_uuid': self.my_uuid, + 'meta_data': self.my_md, + 'vendor_data': vd}}}}) + self.assertTrue(ds._get_data()) + mocks._is_platform_viable.assert_called_once_with() + mocks.crawl_metadata.assert_called_once_with() + self.assertEqual(vd, ds.vendordata_pure) + self.assertEqual(vd['cloud-init'], ds.vendordata_raw) + + @mock.patch(DS_PATH + "._is_iscsi_root", return_value=True) + def test_with_userdata(self, m_is_iscsi_root): + """Ensure user-data is populated if present and is binary.""" + my_userdata = b'abcdefg' + ds, mocks = self._get_ds(patches={ + '_is_platform_viable': {'return_value': True}, + 'crawl_metadata': { + 'return_value': { + MD_VER: {'system_uuid': self.my_uuid, + 'meta_data': self.my_md, + 'user_data': my_userdata}}}}) + self.assertTrue(ds._get_data()) + mocks._is_platform_viable.assert_called_once_with() + mocks.crawl_metadata.assert_called_once_with() + self.assertEqual(self.my_uuid, ds.system_uuid) + self.assertIn(self.my_md["public_keys"]["0"], ds.get_public_ssh_keys()) + self.assertEqual(self.my_md['uuid'], ds.get_instance_id()) + self.assertEqual(my_userdata, ds.userdata_raw) + + @mock.patch(DS_PATH + ".cmdline.read_kernel_cmdline_config") + @mock.patch(DS_PATH + "._is_iscsi_root", return_value=True) + def test_network_cmdline(self, m_is_iscsi_root, m_cmdline_config): + """network_config should read kernel cmdline.""" + distro = mock.MagicMock() + ds, _ = self._get_ds(distro=distro, patches={ + '_is_platform_viable': {'return_value': True}, + 'crawl_metadata': { + 'return_value': { + MD_VER: {'system_uuid': self.my_uuid, + 'meta_data': self.my_md}}}}) + ncfg = {'version': 1, 'config': [{'a': 'b'}]} + m_cmdline_config.return_value = ncfg + self.assertTrue(ds._get_data()) + self.assertEqual(ncfg, ds.network_config) + m_cmdline_config.assert_called_once_with() + self.assertFalse(distro.generate_fallback_config.called) + + @mock.patch(DS_PATH + ".cmdline.read_kernel_cmdline_config") + @mock.patch(DS_PATH + "._is_iscsi_root", return_value=True) + def test_network_fallback(self, m_is_iscsi_root, m_cmdline_config): + """test that fallback network is generated if no kernel cmdline.""" + distro = mock.MagicMock() + ds, _ = self._get_ds(distro=distro, patches={ + '_is_platform_viable': {'return_value': True}, + 'crawl_metadata': { + 'return_value': { + MD_VER: {'system_uuid': self.my_uuid, + 'meta_data': self.my_md}}}}) + ncfg = {'version': 1, 'config': [{'a': 'b'}]} + m_cmdline_config.return_value = None + self.assertTrue(ds._get_data()) + ncfg = {'version': 1, 'config': [{'distro1': 'value'}]} + distro.generate_fallback_config.return_value = ncfg + self.assertEqual(ncfg, ds.network_config) + m_cmdline_config.assert_called_once_with() + distro.generate_fallback_config.assert_called_once_with() + self.assertEqual(1, m_cmdline_config.call_count) + + # test that the result got cached, and the methods not re-called. + self.assertEqual(ncfg, ds.network_config) + self.assertEqual(1, m_cmdline_config.call_count) + + +@mock.patch(DS_PATH + "._read_system_uuid", return_value=str(uuid.uuid4())) +class TestReadMetaData(test_helpers.HttprettyTestCase): + """Test the read_metadata which interacts with http metadata service.""" + + mdurl = oracle.METADATA_ENDPOINT + my_md = {"uuid": "ocid1.instance.oc1.phx.abyhqlj", + "name": "ci-vm1", "availability_zone": "phx-ad-3", + "hostname": "ci-vm1hostname", + "launch_index": 0, "files": [], + "public_keys": {"0": "ssh-rsa AAAAB3N...== user@host"}, + "meta": {}} + + def populate_md(self, data): + """call httppretty.register_url for each item dict 'data', + including valid indexes. Text values converted to bytes.""" + httpretty.register_uri( + httpretty.GET, self.mdurl + MD_VER + "/", + '\n'.join(data.keys()).encode('utf-8')) + for k, v in data.items(): + httpretty.register_uri( + httpretty.GET, self.mdurl + MD_VER + "/" + k, + v if not isinstance(v, six.text_type) else v.encode('utf-8')) + + def test_broken_no_sys_uuid(self, m_read_system_uuid): + """Datasource requires ability to read system_uuid and true return.""" + m_read_system_uuid.return_value = None + self.assertRaises(BrokenMetadata, oracle.read_metadata) + + def test_broken_no_metadata_json(self, m_read_system_uuid): + """Datasource requires meta_data.json.""" + httpretty.register_uri( + httpretty.GET, self.mdurl + MD_VER + "/", + '\n'.join(['user_data']).encode('utf-8')) + with self.assertRaises(BrokenMetadata) as cm: + oracle.read_metadata() + self.assertIn("Required field 'meta_data.json' missing", + str(cm.exception)) + + def test_with_userdata(self, m_read_system_uuid): + data = {'user_data': b'#!/bin/sh\necho hi world\n', + 'meta_data.json': json.dumps(self.my_md)} + self.populate_md(data) + result = oracle.read_metadata()[MD_VER] + self.assertEqual(data['user_data'], result['user_data']) + self.assertEqual(self.my_md, result['meta_data']) + + def test_without_userdata(self, m_read_system_uuid): + data = {'meta_data.json': json.dumps(self.my_md)} + self.populate_md(data) + result = oracle.read_metadata()[MD_VER] + self.assertNotIn('user_data', result) + self.assertEqual(self.my_md, result['meta_data']) + + def test_unknown_fields_included(self, m_read_system_uuid): + """Unknown fields listed in index should be included. + And those ending in .json should be decoded.""" + some_data = {'key1': 'data1', 'subk1': {'subd1': 'subv'}} + some_vendor_data = {'cloud-init': 'foo'} + data = {'meta_data.json': json.dumps(self.my_md), + 'some_data.json': json.dumps(some_data), + 'vendor_data.json': json.dumps(some_vendor_data), + 'other_blob': b'this is blob'} + self.populate_md(data) + result = oracle.read_metadata()[MD_VER] + self.assertNotIn('user_data', result) + self.assertEqual(self.my_md, result['meta_data']) + self.assertEqual(some_data, result['some_data']) + self.assertEqual(some_vendor_data, result['vendor_data']) + self.assertEqual(data['other_blob'], result['other_blob']) + + +class TestIsPlatformViable(test_helpers.CiTestCase): + @mock.patch(DS_PATH + ".util.read_dmi_data", + return_value=oracle.CHASSIS_ASSET_TAG) + def test_expected_viable(self, m_read_dmi_data): + """System with known chassis tag is viable.""" + self.assertTrue(oracle._is_platform_viable()) + m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')]) + + @mock.patch(DS_PATH + ".util.read_dmi_data", return_value=None) + def test_expected_not_viable_dmi_data_none(self, m_read_dmi_data): + """System without known chassis tag is not viable.""" + self.assertFalse(oracle._is_platform_viable()) + m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')]) + + @mock.patch(DS_PATH + ".util.read_dmi_data", return_value="LetsGoCubs") + def test_expected_not_viable_other(self, m_read_dmi_data): + """System with unnown chassis tag is not viable.""" + self.assertFalse(oracle._is_platform_viable()) + m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')]) + + +class TestLoadIndex(test_helpers.CiTestCase): + """_load_index handles parsing of an index into a proper list. + The tests here guarantee correct parsing of html version or + a fixed version. See the function docstring for more doc.""" + + _known_html_api_versions = dedent("""\ + + Index of /openstack/ + +

Index of /openstack/


../
+        2013-10-17/   27-Jun-2018 12:22  -
+        latest/           27-Jun-2018 12:22  -
+        

+ """) + + _known_html_contents = dedent("""\ + + Index of /openstack/2013-10-17/ + +

Index of /openstack/2013-10-17/


../
+        meta_data.json  27-Jun-2018 12:22  679
+        user_data            27-Jun-2018 12:22  146
+        

+ """) + + def test_parse_html(self): + """Test parsing of lower case html.""" + self.assertEqual( + ['2013-10-17/', 'latest/'], + oracle._load_index(self._known_html_api_versions)) + self.assertEqual( + ['meta_data.json', 'user_data'], + oracle._load_index(self._known_html_contents)) + + def test_parse_html_upper(self): + """Test parsing of upper case html, although known content is lower.""" + def _toupper(data): + return data.replace("", "HTML>") + + self.assertEqual( + ['2013-10-17/', 'latest/'], + oracle._load_index(_toupper(self._known_html_api_versions))) + self.assertEqual( + ['meta_data.json', 'user_data'], + oracle._load_index(_toupper(self._known_html_contents))) + + def test_parse_newline_list_with_endl(self): + """Test parsing of newline separated list with ending newline.""" + self.assertEqual( + ['2013-10-17/', 'latest/'], + oracle._load_index("\n".join(["2013-10-17/", "latest/", ""]))) + self.assertEqual( + ['meta_data.json', 'user_data'], + oracle._load_index("\n".join(["meta_data.json", "user_data", ""]))) + + def test_parse_newline_list_without_endl(self): + """Test parsing of newline separated list with no ending newline. + + Actual openstack implementation does not include trailing newline.""" + self.assertEqual( + ['2013-10-17/', 'latest/'], + oracle._load_index("\n".join(["2013-10-17/", "latest/"]))) + self.assertEqual( + ['meta_data.json', 'user_data'], + oracle._load_index("\n".join(["meta_data.json", "user_data"]))) + + +# vi: ts=4 expandtab diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index 30e57d85..83034589 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -189,6 +189,7 @@ Follow for more information. datasources/nocloud.rst datasources/opennebula.rst datasources/openstack.rst + datasources/oracle.rst datasources/ovf.rst datasources/smartos.rst datasources/fallback.rst diff --git a/doc/rtd/topics/datasources/oracle.rst b/doc/rtd/topics/datasources/oracle.rst new file mode 100644 index 00000000..f2383cee --- /dev/null +++ b/doc/rtd/topics/datasources/oracle.rst @@ -0,0 +1,26 @@ +.. _datasource_oracle: + +Oracle +====== + +This datasource reads metadata, vendor-data and user-data from +`Oracle Compute Infrastructure`_ (OCI). + +Oracle Platform +--------------- +OCI provides bare metal and virtual machines. In both cases, +the platform identifies itself via DMI data in the chassis asset tag +with the string 'OracleCloud.com'. + +Oracle's platform provides a metadata service that mimics the 2013-10-17 +version of OpenStack metadata service. Initially support for Oracle +was done via the OpenStack datasource. + +Cloud-init has a specific datasource for Oracle in order to: + a. allow and support future growth of the OCI platform. + b. address small differences between OpenStack and Oracle metadata + implementation. + + +.. _Oracle Compute Infrastructure: https://cloud.oracle.com/ +.. vi: textwidth=78 diff --git a/tests/unittests/test_datasource/test_common.py b/tests/unittests/test_datasource/test_common.py index 1a5a3db2..6b01a4ea 100644 --- a/tests/unittests/test_datasource/test_common.py +++ b/tests/unittests/test_datasource/test_common.py @@ -20,6 +20,7 @@ from cloudinit.sources import ( DataSourceNoCloud as NoCloud, DataSourceOpenNebula as OpenNebula, DataSourceOpenStack as OpenStack, + DataSourceOracle as Oracle, DataSourceOVF as OVF, DataSourceScaleway as Scaleway, DataSourceSmartOS as SmartOS, @@ -37,6 +38,7 @@ DEFAULT_LOCAL = [ IBMCloud.DataSourceIBMCloud, NoCloud.DataSourceNoCloud, OpenNebula.DataSourceOpenNebula, + Oracle.DataSourceOracle, OVF.DataSourceOVF, SmartOS.DataSourceSmartOS, Ec2.DataSourceEc2Local, diff --git a/tests/unittests/test_datasource/test_openstack.py b/tests/unittests/test_datasource/test_openstack.py index d862f4bc..6e1e971b 100644 --- a/tests/unittests/test_datasource/test_openstack.py +++ b/tests/unittests/test_datasource/test_openstack.py @@ -16,7 +16,7 @@ from six import StringIO from cloudinit import helpers from cloudinit import settings -from cloudinit.sources import convert_vendordata, UNSET +from cloudinit.sources import BrokenMetadata, convert_vendordata, UNSET from cloudinit.sources import DataSourceOpenStack as ds from cloudinit.sources.helpers import openstack from cloudinit import util @@ -186,7 +186,7 @@ class TestOpenStackDataSource(test_helpers.HttprettyTestCase): if k.endswith('meta_data.json'): os_files[k] = json.dumps(os_meta) _register_uris(self.VERSION, {}, {}, os_files) - self.assertRaises(openstack.BrokenMetadata, _read_metadata_service) + self.assertRaises(BrokenMetadata, _read_metadata_service) def test_userdata_empty(self): os_files = copy.deepcopy(OS_FILES) @@ -217,7 +217,7 @@ class TestOpenStackDataSource(test_helpers.HttprettyTestCase): if k.endswith('vendor_data.json'): os_files[k] = '{' # some invalid json _register_uris(self.VERSION, {}, {}, os_files) - self.assertRaises(openstack.BrokenMetadata, _read_metadata_service) + self.assertRaises(BrokenMetadata, _read_metadata_service) def test_metadata_invalid(self): os_files = copy.deepcopy(OS_FILES) @@ -225,7 +225,7 @@ class TestOpenStackDataSource(test_helpers.HttprettyTestCase): if k.endswith('meta_data.json'): os_files[k] = '{' # some invalid json _register_uris(self.VERSION, {}, {}, os_files) - self.assertRaises(openstack.BrokenMetadata, _read_metadata_service) + self.assertRaises(BrokenMetadata, _read_metadata_service) @test_helpers.mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') def test_datasource(self, m_dhcp): @@ -525,8 +525,11 @@ class TestDetectOpenStack(test_helpers.CiTestCase): m_dmi.side_effect = fake_dmi_read self.assertTrue( - ds.detect_openstack(), + ds.detect_openstack(accept_oracle=True), 'Expected detect_openstack == True on OracleCloud.com') + self.assertFalse( + ds.detect_openstack(accept_oracle=False), + 'Expected detect_openstack == False.') @test_helpers.mock.patch(MOCK_PATH + 'util.get_proc_env') @test_helpers.mock.patch(MOCK_PATH + 'util.read_dmi_data') diff --git a/tests/unittests/test_ds_identify.py b/tests/unittests/test_ds_identify.py index 64d9f9f8..e08e7908 100644 --- a/tests/unittests/test_ds_identify.py +++ b/tests/unittests/test_ds_identify.py @@ -12,6 +12,7 @@ from cloudinit.tests.helpers import ( from cloudinit.sources import DataSourceIBMCloud as ds_ibm from cloudinit.sources import DataSourceSmartOS as ds_smartos +from cloudinit.sources import DataSourceOracle as ds_oracle UNAME_MYSYS = ("Linux bart 4.4.0-62-generic #83-Ubuntu " "SMP Wed Jan 18 14:10:15 UTC 2017 x86_64 GNU/Linux") @@ -598,6 +599,18 @@ class TestIsIBMProvisioning(DsIdentifyBase): self.assertIn("from current boot", ret.stderr) +class TestOracle(DsIdentifyBase): + def test_found_by_chassis(self): + """Simple positive test of Oracle by chassis id.""" + self._test_ds_found('Oracle') + + def test_not_found(self): + """Simple negative test of Oracle.""" + mycfg = copy.deepcopy(VALID_CFG['Oracle']) + mycfg['files'][P_CHASSIS_ASSET_TAG] = "Not Oracle" + self._check_via_dict(mycfg, rc=RC_NOT_FOUND) + + def blkid_out(disks=None): """Convert a list of disk dictionaries into blkid content.""" if disks is None: @@ -838,6 +851,12 @@ VALID_CFG = { }, ], }, + 'Oracle': { + 'ds': 'Oracle', + 'files': { + P_CHASSIS_ASSET_TAG: ds_oracle.CHASSIS_ASSET_TAG + '\n', + } + }, 'SmartOS-bhyve': { 'ds': 'SmartOS', 'mocks': [ diff --git a/tools/ds-identify b/tools/ds-identify index ce0477a5..fcc60149 100755 --- a/tools/ds-identify +++ b/tools/ds-identify @@ -116,7 +116,7 @@ DI_DSNAME="" # be searched if there is no setting found in config. DI_DSLIST_DEFAULT="MAAS ConfigDrive NoCloud AltCloud Azure Bigstep \ CloudSigma CloudStack DigitalOcean AliYun Ec2 GCE OpenNebula OpenStack \ -OVF SmartOS Scaleway Hetzner IBMCloud" +OVF SmartOS Scaleway Hetzner IBMCloud Oracle" DI_DSLIST="" DI_MODE="" DI_ON_FOUND="" @@ -1036,6 +1036,12 @@ dscheck_Hetzner() { return ${DS_NOT_FOUND} } +dscheck_Oracle() { + local asset_tag="OracleCloud.com" + dmi_chassis_asset_tag_matches "${asset_tag}" && return ${DS_FOUND} + return ${DS_NOT_FOUND} +} + is_ibm_provisioning() { local pcfg="${PATH_ROOT}/root/provisioningConfiguration.cfg" local logf="${PATH_ROOT}/root/swinstall.log" -- cgit v1.2.3 From c7555762f3a30190ce7726b4d013bc3e83c7e4b6 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Tue, 11 Sep 2018 17:31:46 +0000 Subject: user-data: jinja template to render instance-data.json in cloud-config Allow users to provide '## template: jinja' as the first line or their #cloud-config or custom script user-data parts. When this header exists, the cloud-config or script will be rendered as a jinja template. All instance metadata keys and values present in /run/cloud-init/instance-data.json will be available as jinja variables for the template. This means any cloud-config module or script can reference any standardized instance data in templates and scripts. Additionally, any standardized instance-data.json keys scoped below a '' key will be promoted as a top-level key for ease of reference in templates. This means that '{{ local_hostname }}' is the same as using the latest '{{ v#.local_hostname }}'. Since instance-data is written to /run/cloud-init/instance-data.json, make sure it is persisted across reboots when the cached datasource opject is reloaded. LP: #1791781 --- bash_completion/cloud-init | 2 + cloudinit/cmd/devel/__init__.py | 25 ++ cloudinit/cmd/devel/parser.py | 5 +- cloudinit/cmd/devel/render.py | 90 ++++++ cloudinit/cmd/devel/tests/test_render.py | 101 +++++++ cloudinit/cmd/main.py | 16 +- cloudinit/handlers/__init__.py | 11 +- cloudinit/handlers/boot_hook.py | 12 +- cloudinit/handlers/cloud_config.py | 15 +- cloudinit/handlers/jinja_template.py | 137 +++++++++ cloudinit/handlers/shell_script.py | 9 +- cloudinit/handlers/upstart_job.py | 9 +- cloudinit/helpers.py | 4 + cloudinit/log.py | 12 +- cloudinit/sources/__init__.py | 47 ++- cloudinit/sources/tests/test_init.py | 75 ++++- cloudinit/stages.py | 22 +- cloudinit/templater.py | 28 +- cloudinit/tests/helpers.py | 9 + doc/rtd/topics/capabilities.rst | 15 +- doc/rtd/topics/datasources.rst | 47 +++ doc/rtd/topics/format.rst | 21 +- tests/cloud_tests/testcases/base.py | 8 +- tests/unittests/test_builtin_handlers.py | 324 +++++++++++++++++++-- .../test_handler/test_handler_etc_hosts.py | 1 + tests/unittests/test_handler/test_handler_ntp.py | 1 + tests/unittests/test_templating.py | 23 ++ 27 files changed, 959 insertions(+), 110 deletions(-) create mode 100755 cloudinit/cmd/devel/render.py create mode 100644 cloudinit/cmd/devel/tests/test_render.py create mode 100644 cloudinit/handlers/jinja_template.py (limited to 'cloudinit/sources/tests') diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init index f38164b0..b3a5ced3 100644 --- a/bash_completion/cloud-init +++ b/bash_completion/cloud-init @@ -62,6 +62,8 @@ _cloudinit_complete() net-convert) COMPREPLY=($(compgen -W "--help --network-data --kind --directory --output-kind" -- $cur_word)) ;; + render) + COMPREPLY=($(compgen -W "--help --instance-data --debug" -- $cur_word)) schema) COMPREPLY=($(compgen -W "--help --config-file --doc --annotate" -- $cur_word)) ;; diff --git a/cloudinit/cmd/devel/__init__.py b/cloudinit/cmd/devel/__init__.py index e69de29b..3ae28b69 100644 --- a/cloudinit/cmd/devel/__init__.py +++ b/cloudinit/cmd/devel/__init__.py @@ -0,0 +1,25 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Common cloud-init devel commandline utility functions.""" + + +import logging + +from cloudinit import log +from cloudinit.stages import Init + + +def addLogHandlerCLI(logger, log_level): + """Add a commandline logging handler to emit messages to stderr.""" + formatter = logging.Formatter('%(levelname)s: %(message)s') + log.setupBasicLogging(log_level, formatter=formatter) + return logger + + +def read_cfg_paths(): + """Return a Paths object based on the system configuration on disk.""" + init = Init(ds_deps=[]) + init.read_cfg() + return init.paths + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/devel/parser.py b/cloudinit/cmd/devel/parser.py index 40a4b019..99a234ce 100644 --- a/cloudinit/cmd/devel/parser.py +++ b/cloudinit/cmd/devel/parser.py @@ -8,6 +8,7 @@ import argparse from cloudinit.config import schema from . import net_convert +from . import render def get_parser(parser=None): @@ -22,7 +23,9 @@ def get_parser(parser=None): ('schema', 'Validate cloud-config files for document schema', schema.get_parser, schema.handle_schema_args), (net_convert.NAME, net_convert.__doc__, - net_convert.get_parser, net_convert.handle_args) + net_convert.get_parser, net_convert.handle_args), + (render.NAME, render.__doc__, + render.get_parser, render.handle_args) ] for (subcmd, helpmsg, get_parser, handler) in subcmds: parser = subparsers.add_parser(subcmd, help=helpmsg) diff --git a/cloudinit/cmd/devel/render.py b/cloudinit/cmd/devel/render.py new file mode 100755 index 00000000..e85933db --- /dev/null +++ b/cloudinit/cmd/devel/render.py @@ -0,0 +1,90 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Debug jinja template rendering of user-data.""" + +import argparse +import os +import sys + +from cloudinit.handlers.jinja_template import render_jinja_payload_from_file +from cloudinit import log +from cloudinit.sources import INSTANCE_JSON_FILE +from cloudinit import util +from . import addLogHandlerCLI, read_cfg_paths + +NAME = 'render' +DEFAULT_INSTANCE_DATA = '/run/cloud-init/instance-data.json' + +LOG = log.getLogger(NAME) + + +def get_parser(parser=None): + """Build or extend and arg parser for jinja render utility. + + @param parser: Optional existing ArgumentParser instance representing the + subcommand which will be extended to support the args of this utility. + + @returns: ArgumentParser with proper argument configuration. + """ + if not parser: + parser = argparse.ArgumentParser(prog=NAME, description=__doc__) + parser.add_argument( + 'user_data', type=str, help='Path to the user-data file to render') + parser.add_argument( + '-i', '--instance-data', type=str, + help=('Optional path to instance-data.json file. Defaults to' + ' /run/cloud-init/instance-data.json')) + parser.add_argument('-d', '--debug', action='store_true', default=False, + help='Add verbose messages during template render') + return parser + + +def handle_args(name, args): + """Render the provided user-data template file using instance-data values. + + Also setup CLI log handlers to report to stderr since this is a development + utility which should be run by a human on the CLI. + + @return 0 on success, 1 on failure. + """ + addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING) + if not args.instance_data: + paths = read_cfg_paths() + instance_data_fn = os.path.join( + paths.run_dir, INSTANCE_JSON_FILE) + else: + instance_data_fn = args.instance_data + try: + with open(instance_data_fn) as stream: + instance_data = stream.read() + instance_data = util.load_json(instance_data) + except IOError: + LOG.error('Missing instance-data.json file: %s', instance_data_fn) + return 1 + try: + with open(args.user_data) as stream: + user_data = stream.read() + except IOError: + LOG.error('Missing user-data file: %s', args.user_data) + return 1 + rendered_payload = render_jinja_payload_from_file( + payload=user_data, payload_fn=args.user_data, + instance_data_file=instance_data_fn, + debug=True if args.debug else False) + if not rendered_payload: + LOG.error('Unable to render user-data file: %s', args.user_data) + return 1 + sys.stdout.write(rendered_payload) + return 0 + + +def main(): + args = get_parser().parse_args() + return(handle_args(NAME, args)) + + +if __name__ == '__main__': + sys.exit(main()) + + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/devel/tests/test_render.py b/cloudinit/cmd/devel/tests/test_render.py new file mode 100644 index 00000000..fc5d2c0d --- /dev/null +++ b/cloudinit/cmd/devel/tests/test_render.py @@ -0,0 +1,101 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from six import StringIO +import os + +from collections import namedtuple +from cloudinit.cmd.devel import render +from cloudinit.helpers import Paths +from cloudinit.sources import INSTANCE_JSON_FILE +from cloudinit.tests.helpers import CiTestCase, mock, skipUnlessJinja +from cloudinit.util import ensure_dir, write_file + + +class TestRender(CiTestCase): + + with_logs = True + + args = namedtuple('renderargs', 'user_data instance_data debug') + + def setUp(self): + super(TestRender, self).setUp() + self.tmp = self.tmp_dir() + + def test_handle_args_error_on_missing_user_data(self): + """When user_data file path does not exist, log an error.""" + absent_file = self.tmp_path('user-data', dir=self.tmp) + instance_data = self.tmp_path('instance-data', dir=self.tmp) + write_file(instance_data, '{}') + args = self.args( + user_data=absent_file, instance_data=instance_data, debug=False) + with mock.patch('sys.stderr', new_callable=StringIO): + self.assertEqual(1, render.handle_args('anyname', args)) + self.assertIn( + 'Missing user-data file: %s' % absent_file, + self.logs.getvalue()) + + def test_handle_args_error_on_missing_instance_data(self): + """When instance_data file path does not exist, log an error.""" + user_data = self.tmp_path('user-data', dir=self.tmp) + absent_file = self.tmp_path('instance-data', dir=self.tmp) + args = self.args( + user_data=user_data, instance_data=absent_file, debug=False) + with mock.patch('sys.stderr', new_callable=StringIO): + self.assertEqual(1, render.handle_args('anyname', args)) + self.assertIn( + 'Missing instance-data.json file: %s' % absent_file, + self.logs.getvalue()) + + def test_handle_args_defaults_instance_data(self): + """When no instance_data argument, default to configured run_dir.""" + user_data = self.tmp_path('user-data', dir=self.tmp) + run_dir = self.tmp_path('run_dir', dir=self.tmp) + ensure_dir(run_dir) + paths = Paths({'run_dir': run_dir}) + self.add_patch('cloudinit.cmd.devel.render.read_cfg_paths', 'm_paths') + self.m_paths.return_value = paths + args = self.args( + user_data=user_data, instance_data=None, debug=False) + with mock.patch('sys.stderr', new_callable=StringIO): + self.assertEqual(1, render.handle_args('anyname', args)) + json_file = os.path.join(run_dir, INSTANCE_JSON_FILE) + self.assertIn( + 'Missing instance-data.json file: %s' % json_file, + self.logs.getvalue()) + + @skipUnlessJinja() + def test_handle_args_renders_instance_data_vars_in_template(self): + """If user_data file is a jinja template render instance-data vars.""" + user_data = self.tmp_path('user-data', dir=self.tmp) + write_file(user_data, '##template: jinja\nrendering: {{ my_var }}') + instance_data = self.tmp_path('instance-data', dir=self.tmp) + write_file(instance_data, '{"my-var": "jinja worked"}') + args = self.args( + user_data=user_data, instance_data=instance_data, debug=True) + with mock.patch('sys.stderr', new_callable=StringIO) as m_console_err: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, render.handle_args('anyname', args)) + self.assertIn( + 'DEBUG: Converted jinja variables\n{', self.logs.getvalue()) + self.assertIn( + 'DEBUG: Converted jinja variables\n{', m_console_err.getvalue()) + self.assertEqual('rendering: jinja worked', m_stdout.getvalue()) + + @skipUnlessJinja() + def test_handle_args_warns_and_gives_up_on_invalid_jinja_operation(self): + """If user_data file has invalid jinja operations log warnings.""" + user_data = self.tmp_path('user-data', dir=self.tmp) + write_file(user_data, '##template: jinja\nrendering: {{ my-var }}') + instance_data = self.tmp_path('instance-data', dir=self.tmp) + write_file(instance_data, '{"my-var": "jinja worked"}') + args = self.args( + user_data=user_data, instance_data=instance_data, debug=True) + with mock.patch('sys.stderr', new_callable=StringIO): + self.assertEqual(1, render.handle_args('anyname', args)) + self.assertIn( + 'WARNING: Ignoring jinja template for %s: Undefined jinja' + ' variable: "my-var". Jinja tried subtraction. Perhaps you meant' + ' "my_var"?' % user_data, + self.logs.getvalue()) + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 4ea4fe7f..0eee583c 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -348,6 +348,7 @@ def main_init(name, args): LOG.debug("[%s] barreling on in force mode without datasource", mode) + _maybe_persist_instance_data(init) # Stage 6 iid = init.instancify() LOG.debug("[%s] %s will now be targeting instance id: %s. new=%s", @@ -490,6 +491,7 @@ def main_modules(action_name, args): print_exc(msg) if not args.force: return [(msg)] + _maybe_persist_instance_data(init) # Stage 3 mods = stages.Modules(init, extract_fns(args), reporter=args.reporter) # Stage 4 @@ -541,6 +543,7 @@ def main_single(name, args): " likely bad things to come!")) if not args.force: return 1 + _maybe_persist_instance_data(init) # Stage 3 mods = stages.Modules(init, extract_fns(args), reporter=args.reporter) mod_args = args.module_args @@ -688,6 +691,15 @@ def status_wrapper(name, args, data_d=None, link_d=None): return len(v1[mode]['errors']) +def _maybe_persist_instance_data(init): + """Write instance-data.json file if absent and datasource is restored.""" + if init.ds_restored: + instance_data_file = os.path.join( + init.paths.run_dir, sources.INSTANCE_JSON_FILE) + if not os.path.exists(instance_data_file): + init.datasource.persist_instance_data() + + def _maybe_set_hostname(init, stage, retry_stage): """Call set-hostname if metadata, vendordata or userdata provides it. @@ -887,6 +899,8 @@ def main(sysv_args=None): if __name__ == '__main__': if 'TZ' not in os.environ: os.environ['TZ'] = ":/etc/localtime" - main(sys.argv) + return_value = main(sys.argv) + if return_value: + sys.exit(return_value) # vi: ts=4 expandtab diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index c3576c04..0db75af9 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -41,7 +41,7 @@ PART_HANDLER_FN_TMPL = 'part-handler-%03d' # For parts without filenames PART_FN_TPL = 'part-%03d' -# Different file beginnings to there content type +# Different file beginnings to their content type INCLUSION_TYPES_MAP = { '#include': 'text/x-include-url', '#include-once': 'text/x-include-once-url', @@ -52,6 +52,7 @@ INCLUSION_TYPES_MAP = { '#cloud-boothook': 'text/cloud-boothook', '#cloud-config-archive': 'text/cloud-config-archive', '#cloud-config-jsonp': 'text/cloud-config-jsonp', + '## template: jinja': 'text/jinja2', } # Sorted longest first @@ -69,9 +70,13 @@ class Handler(object): def __repr__(self): return "%s: [%s]" % (type_utils.obj_name(self), self.list_types()) - @abc.abstractmethod def list_types(self): - raise NotImplementedError() + # Each subclass must define the supported content prefixes it handles. + if not hasattr(self, 'prefixes'): + raise NotImplementedError('Missing prefixes subclass attribute') + else: + return [INCLUSION_TYPES_MAP[prefix] + for prefix in getattr(self, 'prefixes')] @abc.abstractmethod def handle_part(self, *args, **kwargs): diff --git a/cloudinit/handlers/boot_hook.py b/cloudinit/handlers/boot_hook.py index 057b4dbc..dca50a49 100644 --- a/cloudinit/handlers/boot_hook.py +++ b/cloudinit/handlers/boot_hook.py @@ -17,10 +17,13 @@ from cloudinit import util from cloudinit.settings import (PER_ALWAYS) LOG = logging.getLogger(__name__) -BOOTHOOK_PREFIX = "#cloud-boothook" class BootHookPartHandler(handlers.Handler): + + # The content prefixes this handler understands. + prefixes = ['#cloud-boothook'] + def __init__(self, paths, datasource, **_kwargs): handlers.Handler.__init__(self, PER_ALWAYS) self.boothook_dir = paths.get_ipath("boothooks") @@ -28,16 +31,11 @@ class BootHookPartHandler(handlers.Handler): if datasource: self.instance_id = datasource.get_instance_id() - def list_types(self): - return [ - handlers.type_from_starts_with(BOOTHOOK_PREFIX), - ] - def _write_part(self, payload, filename): filename = util.clean_filename(filename) filepath = os.path.join(self.boothook_dir, filename) contents = util.strip_prefix_suffix(util.dos2unix(payload), - prefix=BOOTHOOK_PREFIX) + prefix=self.prefixes[0]) util.write_file(filepath, contents.lstrip(), 0o700) return filepath diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index 178a5b9b..99bf0e61 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -42,14 +42,12 @@ DEF_MERGERS = mergers.string_extract_mergers('dict(replace)+list()+str()') CLOUD_PREFIX = "#cloud-config" JSONP_PREFIX = "#cloud-config-jsonp" -# The file header -> content types this module will handle. -CC_TYPES = { - JSONP_PREFIX: handlers.type_from_starts_with(JSONP_PREFIX), - CLOUD_PREFIX: handlers.type_from_starts_with(CLOUD_PREFIX), -} - class CloudConfigPartHandler(handlers.Handler): + + # The content prefixes this handler understands. + prefixes = [CLOUD_PREFIX, JSONP_PREFIX] + def __init__(self, paths, **_kwargs): handlers.Handler.__init__(self, PER_ALWAYS, version=3) self.cloud_buf = None @@ -58,9 +56,6 @@ class CloudConfigPartHandler(handlers.Handler): self.cloud_fn = paths.get_ipath(_kwargs["cloud_config_path"]) self.file_names = [] - def list_types(self): - return list(CC_TYPES.values()) - def _write_cloud_config(self): if not self.cloud_fn: return @@ -138,7 +133,7 @@ class CloudConfigPartHandler(handlers.Handler): # First time through, merge with an empty dict... if self.cloud_buf is None or not self.file_names: self.cloud_buf = {} - if ctype == CC_TYPES[JSONP_PREFIX]: + if ctype == handlers.INCLUSION_TYPES_MAP[JSONP_PREFIX]: self._merge_patch(payload) else: self._merge_part(payload, headers) diff --git a/cloudinit/handlers/jinja_template.py b/cloudinit/handlers/jinja_template.py new file mode 100644 index 00000000..3fa4097e --- /dev/null +++ b/cloudinit/handlers/jinja_template.py @@ -0,0 +1,137 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +import os +import re + +try: + from jinja2.exceptions import UndefinedError as JUndefinedError +except ImportError: + # No jinja2 dependency + JUndefinedError = Exception + +from cloudinit import handlers +from cloudinit import log as logging +from cloudinit.sources import INSTANCE_JSON_FILE +from cloudinit.templater import render_string, MISSING_JINJA_PREFIX +from cloudinit.util import b64d, load_file, load_json, json_dumps + +from cloudinit.settings import PER_ALWAYS + +LOG = logging.getLogger(__name__) + + +class JinjaTemplatePartHandler(handlers.Handler): + + prefixes = ['## template: jinja'] + + def __init__(self, paths, **_kwargs): + handlers.Handler.__init__(self, PER_ALWAYS, version=3) + self.paths = paths + self.sub_handlers = {} + for handler in _kwargs.get('sub_handlers', []): + for ctype in handler.list_types(): + self.sub_handlers[ctype] = handler + + def handle_part(self, data, ctype, filename, payload, frequency, headers): + if ctype in handlers.CONTENT_SIGNALS: + return + jinja_json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) + rendered_payload = render_jinja_payload_from_file( + payload, filename, jinja_json_file) + if not rendered_payload: + return + subtype = handlers.type_from_starts_with(rendered_payload) + sub_handler = self.sub_handlers.get(subtype) + if not sub_handler: + LOG.warning( + 'Ignoring jinja template for %s. Could not find supported' + ' sub-handler for type %s', filename, subtype) + return + if sub_handler.handler_version == 3: + sub_handler.handle_part( + data, ctype, filename, rendered_payload, frequency, headers) + elif sub_handler.handler_version == 2: + sub_handler.handle_part( + data, ctype, filename, rendered_payload, frequency) + + +def render_jinja_payload_from_file( + payload, payload_fn, instance_data_file, debug=False): + """Render a jinja template payload sourcing variables from jinja_vars_path. + + @param payload: String of jinja template content. Should begin with + ## template: jinja\n. + @param payload_fn: String representing the filename from which the payload + was read used in error reporting. Generally in part-handling this is + 'part-##'. + @param instance_data_file: A path to a json file containing variables that + will be used as jinja template variables. + + @return: A string of jinja-rendered content with the jinja header removed. + Returns None on error. + """ + instance_data = {} + rendered_payload = None + if not os.path.exists(instance_data_file): + raise RuntimeError( + 'Cannot render jinja template vars. Instance data not yet' + ' present at %s' % instance_data_file) + instance_data = load_json(load_file(instance_data_file)) + rendered_payload = render_jinja_payload( + payload, payload_fn, instance_data, debug) + if not rendered_payload: + return None + return rendered_payload + + +def render_jinja_payload(payload, payload_fn, instance_data, debug=False): + instance_jinja_vars = convert_jinja_instance_data( + instance_data, + decode_paths=instance_data.get('base64-encoded-keys', [])) + if debug: + LOG.debug('Converted jinja variables\n%s', + json_dumps(instance_jinja_vars)) + try: + rendered_payload = render_string(payload, instance_jinja_vars) + except (TypeError, JUndefinedError) as e: + LOG.warning( + 'Ignoring jinja template for %s: %s', payload_fn, str(e)) + return None + warnings = [ + "'%s'" % var.replace(MISSING_JINJA_PREFIX, '') + for var in re.findall( + r'%s[^\s]+' % MISSING_JINJA_PREFIX, rendered_payload)] + if warnings: + LOG.warning( + "Could not render jinja template variables in file '%s': %s", + payload_fn, ', '.join(warnings)) + return rendered_payload + + +def convert_jinja_instance_data(data, prefix='', sep='/', decode_paths=()): + """Process instance-data.json dict for use in jinja templates. + + Replace hyphens with underscores for jinja templates and decode any + base64_encoded_keys. + """ + result = {} + decode_paths = [path.replace('-', '_') for path in decode_paths] + for key, value in sorted(data.items()): + if '-' in key: + # Standardize keys for use in #cloud-config/shell templates + key = key.replace('-', '_') + key_path = '{0}{1}{2}'.format(prefix, sep, key) if prefix else key + if key_path in decode_paths: + value = b64d(value) + if isinstance(value, dict): + result[key] = convert_jinja_instance_data( + value, key_path, sep=sep, decode_paths=decode_paths) + if re.match(r'v\d+', key): + # Copy values to top-level aliases + for subkey, subvalue in result[key].items(): + result[subkey] = subvalue + else: + result[key] = value + return result + +# vi: ts=4 expandtab diff --git a/cloudinit/handlers/shell_script.py b/cloudinit/handlers/shell_script.py index e4945a23..214714bc 100644 --- a/cloudinit/handlers/shell_script.py +++ b/cloudinit/handlers/shell_script.py @@ -17,21 +17,18 @@ from cloudinit import util from cloudinit.settings import (PER_ALWAYS) LOG = logging.getLogger(__name__) -SHELL_PREFIX = "#!" class ShellScriptPartHandler(handlers.Handler): + + prefixes = ['#!'] + def __init__(self, paths, **_kwargs): handlers.Handler.__init__(self, PER_ALWAYS) self.script_dir = paths.get_ipath_cur('scripts') if 'script_path' in _kwargs: self.script_dir = paths.get_ipath_cur(_kwargs['script_path']) - def list_types(self): - return [ - handlers.type_from_starts_with(SHELL_PREFIX), - ] - def handle_part(self, data, ctype, filename, payload, frequency): if ctype in handlers.CONTENT_SIGNALS: # TODO(harlowja): maybe delete existing things here diff --git a/cloudinit/handlers/upstart_job.py b/cloudinit/handlers/upstart_job.py index dc338769..83fb0724 100644 --- a/cloudinit/handlers/upstart_job.py +++ b/cloudinit/handlers/upstart_job.py @@ -18,19 +18,16 @@ from cloudinit import util from cloudinit.settings import (PER_INSTANCE) LOG = logging.getLogger(__name__) -UPSTART_PREFIX = "#upstart-job" class UpstartJobPartHandler(handlers.Handler): + + prefixes = ['#upstart-job'] + def __init__(self, paths, **_kwargs): handlers.Handler.__init__(self, PER_INSTANCE) self.upstart_dir = paths.upstart_conf_d - def list_types(self): - return [ - handlers.type_from_starts_with(UPSTART_PREFIX), - ] - def handle_part(self, data, ctype, filename, payload, frequency): if ctype in handlers.CONTENT_SIGNALS: return diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py index 1979cd96..3cc1fb19 100644 --- a/cloudinit/helpers.py +++ b/cloudinit/helpers.py @@ -449,4 +449,8 @@ class DefaultingConfigParser(RawConfigParser): contents = '\n'.join([header, contents, '']) return contents + +def identity(object): + return object + # vi: ts=4 expandtab diff --git a/cloudinit/log.py b/cloudinit/log.py index 1d75c9ff..5ae312ba 100644 --- a/cloudinit/log.py +++ b/cloudinit/log.py @@ -38,10 +38,18 @@ DEF_CON_FORMAT = '%(asctime)s - %(filename)s[%(levelname)s]: %(message)s' logging.Formatter.converter = time.gmtime -def setupBasicLogging(level=DEBUG): +def setupBasicLogging(level=DEBUG, formatter=None): + if not formatter: + formatter = logging.Formatter(DEF_CON_FORMAT) root = logging.getLogger() + for handler in root.handlers: + if hasattr(handler, 'stream') and hasattr(handler.stream, 'name'): + if handler.stream.name == '': + handler.setLevel(level) + return + # Didn't have an existing stderr handler; create a new handler console = logging.StreamHandler(sys.stderr) - console.setFormatter(logging.Formatter(DEF_CON_FORMAT)) + console.setFormatter(formatter) console.setLevel(level) root.addHandler(console) root.setLevel(level) diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 41fde9ba..a775f1a8 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -58,22 +58,27 @@ class InvalidMetaDataException(Exception): pass -def process_base64_metadata(metadata, key_path=''): - """Strip ci-b64 prefix and return metadata with base64-encoded-keys set.""" +def process_instance_metadata(metadata, key_path=''): + """Process all instance metadata cleaning it up for persisting as json. + + Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list + + @return Dict copy of processed metadata. + """ md_copy = copy.deepcopy(metadata) - md_copy['base64-encoded-keys'] = [] + md_copy['base64_encoded_keys'] = [] for key, val in metadata.items(): if key_path: sub_key_path = key_path + '/' + key else: sub_key_path = key if isinstance(val, str) and val.startswith('ci-b64:'): - md_copy['base64-encoded-keys'].append(sub_key_path) + md_copy['base64_encoded_keys'].append(sub_key_path) md_copy[key] = val.replace('ci-b64:', '') if isinstance(val, dict): - return_val = process_base64_metadata(val, sub_key_path) - md_copy['base64-encoded-keys'].extend( - return_val.pop('base64-encoded-keys')) + return_val = process_instance_metadata(val, sub_key_path) + md_copy['base64_encoded_keys'].extend( + return_val.pop('base64_encoded_keys')) md_copy[key] = return_val return md_copy @@ -180,15 +185,24 @@ class DataSource(object): """ self._dirty_cache = True return_value = self._get_data() - json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) if not return_value: return return_value + self.persist_instance_data() + return return_value + + def persist_instance_data(self): + """Process and write INSTANCE_JSON_FILE with all instance metadata. + Replace any hyphens with underscores in key names for use in template + processing. + + @return True on successful write, False otherwise. + """ instance_data = { 'ds': { - 'meta-data': self.metadata, - 'user-data': self.get_userdata_raw(), - 'vendor-data': self.get_vendordata_raw()}} + 'meta_data': self.metadata, + 'user_data': self.get_userdata_raw(), + 'vendor_data': self.get_vendordata_raw()}} if hasattr(self, 'network_json'): network_json = getattr(self, 'network_json') if network_json != UNSET: @@ -202,16 +216,17 @@ class DataSource(object): try: # Process content base64encoding unserializable values content = util.json_dumps(instance_data) - # Strip base64: prefix and return base64-encoded-keys - processed_data = process_base64_metadata(json.loads(content)) + # Strip base64: prefix and set base64_encoded_keys list. + processed_data = process_instance_metadata(json.loads(content)) except TypeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) - return return_value + return False except UnicodeDecodeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) - return return_value + return False + json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) write_json(json_file, processed_data, mode=0o600) - return return_value + return True def _get_data(self): """Walk metadata sources, process crawled data and save attributes.""" diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index 9e939c1e..8299af23 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -20,10 +20,12 @@ class DataSourceTestSubclassNet(DataSource): dsname = 'MyTestSubclass' url_max_wait = 55 - def __init__(self, sys_cfg, distro, paths, custom_userdata=None): + def __init__(self, sys_cfg, distro, paths, custom_userdata=None, + get_data_retval=True): super(DataSourceTestSubclassNet, self).__init__( sys_cfg, distro, paths) self._custom_userdata = custom_userdata + self._get_data_retval = get_data_retval def _get_cloud_name(self): return 'SubclassCloudName' @@ -37,7 +39,7 @@ class DataSourceTestSubclassNet(DataSource): else: self.userdata_raw = 'userdata_raw' self.vendordata_raw = 'vendordata_raw' - return True + return self._get_data_retval class InvalidDataSourceTestSubclassNet(DataSource): @@ -264,7 +266,18 @@ class TestDataSource(CiTestCase): self.assertEqual('fqdnhostname.domain.com', datasource.get_hostname(fqdn=True)) - def test_get_data_write_json_instance_data(self): + def test_get_data_does_not_write_instance_data_on_failure(self): + """get_data does not write INSTANCE_JSON_FILE on get_data False.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp}), + get_data_retval=False) + self.assertFalse(datasource.get_data()) + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + self.assertFalse( + os.path.exists(json_file), 'Found unexpected file %s' % json_file) + + def test_get_data_writes_json_instance_data_on_success(self): """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( @@ -273,7 +286,7 @@ class TestDataSource(CiTestCase): json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) expected = { - 'base64-encoded-keys': [], + 'base64_encoded_keys': [], 'v1': { 'availability-zone': 'myaz', 'cloud-name': 'subclasscloudname', @@ -281,11 +294,12 @@ class TestDataSource(CiTestCase): 'local-hostname': 'test-subclass-hostname', 'region': 'myregion'}, 'ds': { - 'meta-data': {'availability_zone': 'myaz', + 'meta_data': {'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', 'region': 'myregion'}, - 'user-data': 'userdata_raw', - 'vendor-data': 'vendordata_raw'}} + 'user_data': 'userdata_raw', + 'vendor_data': 'vendordata_raw'}} + self.maxDiff = None self.assertEqual(expected, util.load_json(content)) file_stat = os.stat(json_file) self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode)) @@ -296,7 +310,7 @@ class TestDataSource(CiTestCase): datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) - self.assertTrue(datasource.get_data()) + datasource.get_data() json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) expected_userdata = { @@ -306,7 +320,40 @@ class TestDataSource(CiTestCase): " 'cloudinit.helpers.Paths'>"}} instance_json = util.load_json(content) self.assertEqual( - expected_userdata, instance_json['ds']['user-data']) + expected_userdata, instance_json['ds']['user_data']) + + def test_persist_instance_data_writes_ec2_metadata_when_set(self): + """When ec2_metadata class attribute is set, persist to json.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp})) + datasource.ec2_metadata = UNSET + datasource.get_data() + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + instance_data = util.load_json(util.load_file(json_file)) + self.assertNotIn('ec2_metadata', instance_data['ds']) + datasource.ec2_metadata = {'ec2stuff': 'is good'} + datasource.persist_instance_data() + instance_data = util.load_json(util.load_file(json_file)) + self.assertEqual( + {'ec2stuff': 'is good'}, + instance_data['ds']['ec2_metadata']) + + def test_persist_instance_data_writes_network_json_when_set(self): + """When network_data.json class attribute is set, persist to json.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp})) + datasource.get_data() + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + instance_data = util.load_json(util.load_file(json_file)) + self.assertNotIn('network_json', instance_data['ds']) + datasource.network_json = {'network_json': 'is good'} + datasource.persist_instance_data() + instance_data = util.load_json(util.load_file(json_file)) + self.assertEqual( + {'network_json': 'is good'}, + instance_data['ds']['network_json']) @skipIf(not six.PY3, "json serialization on <= py2.7 handles bytes") def test_get_data_base64encodes_unserializable_bytes(self): @@ -320,11 +367,11 @@ class TestDataSource(CiTestCase): content = util.load_file(json_file) instance_json = util.load_json(content) self.assertEqual( - ['ds/user-data/key2/key2.1'], - instance_json['base64-encoded-keys']) + ['ds/user_data/key2/key2.1'], + instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': 'EjM='}}, - instance_json['ds']['user-data']) + instance_json['ds']['user_data']) @skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes") def test_get_data_handles_bytes_values(self): @@ -337,10 +384,10 @@ class TestDataSource(CiTestCase): json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) instance_json = util.load_json(content) - self.assertEqual([], instance_json['base64-encoded-keys']) + self.assertEqual([], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': '\x123'}}, - instance_json['ds']['user-data']) + instance_json['ds']['user_data']) @skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8") def test_non_utf8_encoding_logs_warning(self): diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 8874d405..ef5c6996 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -17,10 +17,11 @@ from cloudinit.settings import ( from cloudinit import handlers # Default handlers (used if not overridden) -from cloudinit.handlers import boot_hook as bh_part -from cloudinit.handlers import cloud_config as cc_part -from cloudinit.handlers import shell_script as ss_part -from cloudinit.handlers import upstart_job as up_part +from cloudinit.handlers.boot_hook import BootHookPartHandler +from cloudinit.handlers.cloud_config import CloudConfigPartHandler +from cloudinit.handlers.jinja_template import JinjaTemplatePartHandler +from cloudinit.handlers.shell_script import ShellScriptPartHandler +from cloudinit.handlers.upstart_job import UpstartJobPartHandler from cloudinit.event import EventType @@ -413,12 +414,17 @@ class Init(object): 'datasource': self.datasource, }) # TODO(harlowja) Hmmm, should we dynamically import these?? + cloudconfig_handler = CloudConfigPartHandler(**opts) + shellscript_handler = ShellScriptPartHandler(**opts) def_handlers = [ - cc_part.CloudConfigPartHandler(**opts), - ss_part.ShellScriptPartHandler(**opts), - bh_part.BootHookPartHandler(**opts), - up_part.UpstartJobPartHandler(**opts), + cloudconfig_handler, + shellscript_handler, + BootHookPartHandler(**opts), + UpstartJobPartHandler(**opts), ] + opts.update( + {'sub_handlers': [cloudconfig_handler, shellscript_handler]}) + def_handlers.append(JinjaTemplatePartHandler(**opts)) return def_handlers def _default_userdata_handlers(self): diff --git a/cloudinit/templater.py b/cloudinit/templater.py index 7e7acb86..b668674b 100644 --- a/cloudinit/templater.py +++ b/cloudinit/templater.py @@ -13,6 +13,7 @@ import collections import re + try: from Cheetah.Template import Template as CTemplate CHEETAH_AVAILABLE = True @@ -20,23 +21,44 @@ except (ImportError, AttributeError): CHEETAH_AVAILABLE = False try: - import jinja2 + from jinja2.runtime import implements_to_string from jinja2 import Template as JTemplate + from jinja2 import DebugUndefined as JUndefined JINJA_AVAILABLE = True except (ImportError, AttributeError): + from cloudinit.helpers import identity + implements_to_string = identity JINJA_AVAILABLE = False + JUndefined = object from cloudinit import log as logging from cloudinit import type_utils as tu from cloudinit import util + LOG = logging.getLogger(__name__) TYPE_MATCHER = re.compile(r"##\s*template:(.*)", re.I) BASIC_MATCHER = re.compile(r'\$\{([A-Za-z0-9_.]+)\}|\$([A-Za-z0-9_.]+)') +MISSING_JINJA_PREFIX = u'CI_MISSING_JINJA_VAR/' + + +@implements_to_string # Needed for python2.7. Otherwise cached super.__str__ +class UndefinedJinjaVariable(JUndefined): + """Class used to represent any undefined jinja template varible.""" + + def __str__(self): + return u'%s%s' % (MISSING_JINJA_PREFIX, self._undefined_name) + + def __sub__(self, other): + other = str(other).replace(MISSING_JINJA_PREFIX, '') + raise TypeError( + 'Undefined jinja variable: "{this}-{other}". Jinja tried' + ' subtraction. Perhaps you meant "{this}_{other}"?'.format( + this=self._undefined_name, other=other)) def basic_render(content, params): - """This does simple replacement of bash variable like templates. + """This does sumple replacement of bash variable like templates. It identifies patterns like ${a} or $a and can also identify patterns like ${a.b} or $a.b which will look for a key 'b' in the dictionary rooted @@ -82,7 +104,7 @@ def detect_template(text): # keep_trailing_newline is in jinja2 2.7+, not 2.6 add = "\n" if content.endswith("\n") else "" return JTemplate(content, - undefined=jinja2.StrictUndefined, + undefined=UndefinedJinjaVariable, trim_blocks=True).render(**params) + add if text.find("\n") != -1: diff --git a/cloudinit/tests/helpers.py b/cloudinit/tests/helpers.py index 42f56c27..2eb7b0cd 100644 --- a/cloudinit/tests/helpers.py +++ b/cloudinit/tests/helpers.py @@ -32,6 +32,7 @@ from cloudinit import cloud from cloudinit import distros from cloudinit import helpers as ch from cloudinit.sources import DataSourceNone +from cloudinit.templater import JINJA_AVAILABLE from cloudinit import util _real_subp = util.subp @@ -518,6 +519,14 @@ def skipUnlessJsonSchema(): _missing_jsonschema_dep, "No python-jsonschema dependency present.") +def skipUnlessJinja(): + return skipIf(not JINJA_AVAILABLE, "No jinja dependency present.") + + +def skipIfJinja(): + return skipIf(JINJA_AVAILABLE, "Jinja dependency present.") + + # older versions of mock do not have the useful 'assert_not_called' if not hasattr(mock.Mock, 'assert_not_called'): def __mock_assert_not_called(mmock): diff --git a/doc/rtd/topics/capabilities.rst b/doc/rtd/topics/capabilities.rst index 3e2c9e31..2d8e2538 100644 --- a/doc/rtd/topics/capabilities.rst +++ b/doc/rtd/topics/capabilities.rst @@ -16,13 +16,15 @@ User configurability `Cloud-init`_ 's behavior can be configured via user-data. - User-data can be given by the user at instance launch time. + User-data can be given by the user at instance launch time. See + :ref:`user_data_formats` for acceptable user-data content. + This is done via the ``--user-data`` or ``--user-data-file`` argument to ec2-run-instances for example. -* Check your local clients documentation for how to provide a `user-data` - string or `user-data` file for usage by cloud-init on instance creation. +* Check your local client's documentation for how to provide a `user-data` + string or `user-data` file to cloud-init on instance creation. Feature detection @@ -166,6 +168,13 @@ likely be promoted to top-level subcommands when stable. validation is work in progress and supports a subset of cloud-config modules. + * ``cloud-init devel render``: Use cloud-init's jinja template render to + process **#cloud-config** or **custom-scripts**, injecting any variables + from ``/run/cloud-init/instance-data.json``. It accepts a user-data file + containing the jinja template header ``## template: jinja`` and renders + that content with any instance-data.json variables present. + + .. _cli_clean: cloud-init clean diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index 83034589..14432e65 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -18,6 +18,8 @@ single way to access the different cloud systems methods to provide this data through the typical usage of subclasses. +.. _instance_metadata: + instance-data ------------- For reference, cloud-init stores all the metadata, vendordata and userdata @@ -110,6 +112,51 @@ Below is an instance-data.json example from an OpenStack instance: } } + +As of cloud-init v. 18.4, any values present in +``/run/cloud-init/instance-data.json`` can be used in cloud-init user data +scripts or cloud config data. This allows consumers to use cloud-init's +vendor-neutral, standardized metadata keys as well as datasource-specific +content for any scripts or cloud-config modules they are using. + +To use instance-data.json values in scripts and **#config-config** files the +user-data will need to contain the following header as the first line **## template: jinja**. Cloud-init will source all variables defined in +``/run/cloud-init/instance-data.json`` and allow scripts or cloud-config files +to reference those paths. Below are two examples:: + + * Cloud config calling home with the ec2 public hostname and avaliability-zone + ``` + ## template: jinja + #cloud-config + runcmd: + - echo 'EC2 public hostname allocated to instance: {{ ds.meta_data.public_hostname }}' > /tmp/instance_metadata + - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> /tmp/instance_metadata + - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", "availability-zone": "{{ v1.availability_zone }}"}' https://example.com.com + ``` + + * Custom user script performing different operations based on region + ``` + ## template: jinja + #!/bin/bash + {% if v1.region == 'us-east-2' -%} + echo 'Installing custom proxies for {{ v1.region }} + sudo apt-get install my-xtra-fast-stack + {%- endif %} + ... + + ``` + +.. note:: + Trying to reference jinja variables that don't exist in + instance-data.json will result in warnings in ``/var/log/cloud-init.log`` + and the following string in your rendered user-data: + ``CI_MISSING_JINJA_VAR/``. + +.. note:: + To save time designing your user-data for a specific cloud's + instance-data.json, use the 'render' cloud-init command on an + instance booted on your favorite cloud. See :ref:`cli_devel` for more + information. Datasource API diff --git a/doc/rtd/topics/format.rst b/doc/rtd/topics/format.rst index 1b0ff366..15234d21 100644 --- a/doc/rtd/topics/format.rst +++ b/doc/rtd/topics/format.rst @@ -1,6 +1,8 @@ -******* -Formats -******* +.. _user_data_formats: + +***************** +User-Data Formats +***************** User data that will be acted upon by cloud-init must be in one of the following types. @@ -65,6 +67,11 @@ Typically used by those who just want to execute a shell script. Begins with: ``#!`` or ``Content-Type: text/x-shellscript`` when using a MIME archive. +.. note:: + New in cloud-init v. 18.4: User-data scripts can also render cloud instance + metadata variables using jinja templating. See + :ref:`instance_metadata` for more information. + Example ------- @@ -103,12 +110,18 @@ These things include: - certain ssh keys should be imported - *and many more...* -**Note:** The file must be valid yaml syntax. +.. note:: + This file must be valid yaml syntax. See the :ref:`yaml_examples` section for a commented set of examples of supported cloud config formats. Begins with: ``#cloud-config`` or ``Content-Type: text/cloud-config`` when using a MIME archive. +.. note:: + New in cloud-init v. 18.4: Cloud config dta can also render cloud instance + metadata variables using jinja templating. See + :ref:`instance_metadata` for more information. + Upstart Job =========== diff --git a/tests/cloud_tests/testcases/base.py b/tests/cloud_tests/testcases/base.py index 696db8dd..27458271 100644 --- a/tests/cloud_tests/testcases/base.py +++ b/tests/cloud_tests/testcases/base.py @@ -168,7 +168,7 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) self.assertEqual( - ['ds/user-data'], instance_data['base64-encoded-keys']) + ['ds/user_data'], instance_data['base64_encoded_keys']) ds = instance_data.get('ds', {}) v1_data = instance_data.get('v1', {}) metadata = ds.get('meta-data', {}) @@ -214,8 +214,8 @@ class CloudTestCase(unittest.TestCase): instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) self.assertEqual( - ['ds/user-data', 'ds/vendor-data'], - sorted(instance_data['base64-encoded-keys'])) + ['ds/user_data', 'ds/vendor_data'], + sorted(instance_data['base64_encoded_keys'])) self.assertEqual('nocloud', v1_data['cloud-name']) self.assertIsNone( v1_data['availability-zone'], @@ -249,7 +249,7 @@ class CloudTestCase(unittest.TestCase): instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) self.assertEqual( - ['ds/user-data'], instance_data['base64-encoded-keys']) + ['ds/user_data'], instance_data['base64_encoded_keys']) self.assertEqual('nocloud', v1_data['cloud-name']) self.assertIsNone( v1_data['availability-zone'], diff --git a/tests/unittests/test_builtin_handlers.py b/tests/unittests/test_builtin_handlers.py index 9751ed95..abe820e1 100644 --- a/tests/unittests/test_builtin_handlers.py +++ b/tests/unittests/test_builtin_handlers.py @@ -2,27 +2,34 @@ """Tests of the built-in user data handlers.""" +import copy import os import shutil import tempfile +from textwrap import dedent -try: - from unittest import mock -except ImportError: - import mock -from cloudinit.tests import helpers as test_helpers +from cloudinit.tests.helpers import ( + FilesystemMockingTestCase, CiTestCase, mock, skipUnlessJinja) from cloudinit import handlers from cloudinit import helpers from cloudinit import util -from cloudinit.handlers import upstart_job +from cloudinit.handlers.cloud_config import CloudConfigPartHandler +from cloudinit.handlers.jinja_template import ( + JinjaTemplatePartHandler, convert_jinja_instance_data, + render_jinja_payload) +from cloudinit.handlers.shell_script import ShellScriptPartHandler +from cloudinit.handlers.upstart_job import UpstartJobPartHandler from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE) -class TestBuiltins(test_helpers.FilesystemMockingTestCase): +class TestUpstartJobPartHandler(FilesystemMockingTestCase): + + mpath = 'cloudinit.handlers.upstart_job.' + def test_upstart_frequency_no_out(self): c_root = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, c_root) @@ -32,14 +39,13 @@ class TestBuiltins(test_helpers.FilesystemMockingTestCase): 'cloud_dir': c_root, 'upstart_dir': up_root, }) - freq = PER_ALWAYS - h = upstart_job.UpstartJobPartHandler(paths) + h = UpstartJobPartHandler(paths) # No files should be written out when # the frequency is ! per-instance h.handle_part('', handlers.CONTENT_START, None, None, None) h.handle_part('blah', 'text/upstart-job', - 'test.conf', 'blah', freq) + 'test.conf', 'blah', frequency=PER_ALWAYS) h.handle_part('', handlers.CONTENT_END, None, None, None) self.assertEqual(0, len(os.listdir(up_root))) @@ -48,7 +54,6 @@ class TestBuiltins(test_helpers.FilesystemMockingTestCase): # files should be written out when frequency is ! per-instance new_root = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, new_root) - freq = PER_INSTANCE self.patchOS(new_root) self.patchUtils(new_root) @@ -56,22 +61,297 @@ class TestBuiltins(test_helpers.FilesystemMockingTestCase): 'upstart_dir': "/etc/upstart", }) - upstart_job.SUITABLE_UPSTART = True util.ensure_dir("/run") util.ensure_dir("/etc/upstart") - with mock.patch.object(util, 'subp') as mockobj: - h = upstart_job.UpstartJobPartHandler(paths) - h.handle_part('', handlers.CONTENT_START, - None, None, None) - h.handle_part('blah', 'text/upstart-job', - 'test.conf', 'blah', freq) - h.handle_part('', handlers.CONTENT_END, - None, None, None) + with mock.patch(self.mpath + 'SUITABLE_UPSTART', return_value=True): + with mock.patch.object(util, 'subp') as m_subp: + h = UpstartJobPartHandler(paths) + h.handle_part('', handlers.CONTENT_START, + None, None, None) + h.handle_part('blah', 'text/upstart-job', + 'test.conf', 'blah', frequency=PER_INSTANCE) + h.handle_part('', handlers.CONTENT_END, + None, None, None) - self.assertEqual(len(os.listdir('/etc/upstart')), 1) + self.assertEqual(len(os.listdir('/etc/upstart')), 1) - mockobj.assert_called_once_with( + m_subp.assert_called_once_with( ['initctl', 'reload-configuration'], capture=False) + +class TestJinjaTemplatePartHandler(CiTestCase): + + with_logs = True + + mpath = 'cloudinit.handlers.jinja_template.' + + def setUp(self): + super(TestJinjaTemplatePartHandler, self).setUp() + self.tmp = self.tmp_dir() + self.run_dir = os.path.join(self.tmp, 'run_dir') + util.ensure_dir(self.run_dir) + self.paths = helpers.Paths({ + 'cloud_dir': self.tmp, 'run_dir': self.run_dir}) + + def test_jinja_template_part_handler_defaults(self): + """On init, paths are saved and subhandler types are empty.""" + h = JinjaTemplatePartHandler(self.paths) + self.assertEqual(['## template: jinja'], h.prefixes) + self.assertEqual(3, h.handler_version) + self.assertEqual(self.paths, h.paths) + self.assertEqual({}, h.sub_handlers) + + def test_jinja_template_part_handler_looks_up_sub_handler_types(self): + """When sub_handlers are passed, init lists types of subhandlers.""" + script_handler = ShellScriptPartHandler(self.paths) + cloudconfig_handler = CloudConfigPartHandler(self.paths) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler, cloudconfig_handler]) + self.assertItemsEqual( + ['text/cloud-config', 'text/cloud-config-jsonp', + 'text/x-shellscript'], + h.sub_handlers) + + def test_jinja_template_part_handler_looks_up_subhandler_types(self): + """When sub_handlers are passed, init lists types of subhandlers.""" + script_handler = ShellScriptPartHandler(self.paths) + cloudconfig_handler = CloudConfigPartHandler(self.paths) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler, cloudconfig_handler]) + self.assertItemsEqual( + ['text/cloud-config', 'text/cloud-config-jsonp', + 'text/x-shellscript'], + h.sub_handlers) + + def test_jinja_template_handle_noop_on_content_signals(self): + """Perform no part handling when content type is CONTENT_SIGNALS.""" + script_handler = ShellScriptPartHandler(self.paths) + + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + with mock.patch.object(script_handler, 'handle_part') as m_handle_part: + h.handle_part( + data='data', ctype=handlers.CONTENT_START, filename='part-1', + payload='## template: jinja\n#!/bin/bash\necho himom', + frequency='freq', headers='headers') + m_handle_part.assert_not_called() + + @skipUnlessJinja() + def test_jinja_template_handle_subhandler_v2_with_clean_payload(self): + """Call version 2 subhandler.handle_part with stripped payload.""" + script_handler = ShellScriptPartHandler(self.paths) + self.assertEqual(2, script_handler.handler_version) + + # Create required instance-data.json file + instance_json = os.path.join(self.run_dir, 'instance-data.json') + instance_data = {'topkey': 'echo himom'} + util.write_file(instance_json, util.json_dumps(instance_data)) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + with mock.patch.object(script_handler, 'handle_part') as m_part: + # ctype with leading '!' not in handlers.CONTENT_SIGNALS + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_START, + filename='part01', + payload='## template: jinja \t \n#!/bin/bash\n{{ topkey }}', + frequency='freq', headers='headers') + m_part.assert_called_once_with( + 'data', '!__begin__', 'part01', '#!/bin/bash\necho himom', 'freq') + + @skipUnlessJinja() + def test_jinja_template_handle_subhandler_v3_with_clean_payload(self): + """Call version 3 subhandler.handle_part with stripped payload.""" + cloudcfg_handler = CloudConfigPartHandler(self.paths) + self.assertEqual(3, cloudcfg_handler.handler_version) + + # Create required instance-data.json file + instance_json = os.path.join(self.run_dir, 'instance-data.json') + instance_data = {'topkey': {'sub': 'runcmd: [echo hi]'}} + util.write_file(instance_json, util.json_dumps(instance_data)) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[cloudcfg_handler]) + with mock.patch.object(cloudcfg_handler, 'handle_part') as m_part: + # ctype with leading '!' not in handlers.CONTENT_SIGNALS + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_END, + filename='part01', + payload='## template: jinja\n#cloud-config\n{{ topkey.sub }}', + frequency='freq', headers='headers') + m_part.assert_called_once_with( + 'data', '!__end__', 'part01', '#cloud-config\nruncmd: [echo hi]', + 'freq', 'headers') + + def test_jinja_template_handle_errors_on_missing_instance_data_json(self): + """If instance-data is absent, raise an error from handle_part.""" + script_handler = ShellScriptPartHandler(self.paths) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + with self.assertRaises(RuntimeError) as context_manager: + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_START, + filename='part01', + payload='## template: jinja \n#!/bin/bash\necho himom', + frequency='freq', headers='headers') + script_file = os.path.join(script_handler.script_dir, 'part01') + self.assertEqual( + 'Cannot render jinja template vars. Instance data not yet present' + ' at {}/instance-data.json'.format( + self.run_dir), str(context_manager.exception)) + self.assertFalse( + os.path.exists(script_file), + 'Unexpected file created %s' % script_file) + + @skipUnlessJinja() + def test_jinja_template_handle_renders_jinja_content(self): + """When present, render jinja variables from instance-data.json.""" + script_handler = ShellScriptPartHandler(self.paths) + instance_json = os.path.join(self.run_dir, 'instance-data.json') + instance_data = {'topkey': {'subkey': 'echo himom'}} + util.write_file(instance_json, util.json_dumps(instance_data)) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_START, + filename='part01', + payload=( + '## template: jinja \n' + '#!/bin/bash\n' + '{{ topkey.subkey|default("nosubkey") }}'), + frequency='freq', headers='headers') + script_file = os.path.join(script_handler.script_dir, 'part01') + self.assertNotIn( + 'Instance data not yet present at {}/instance-data.json'.format( + self.run_dir), + self.logs.getvalue()) + self.assertEqual( + '#!/bin/bash\necho himom', util.load_file(script_file)) + + @skipUnlessJinja() + def test_jinja_template_handle_renders_jinja_content_missing_keys(self): + """When specified jinja variable is undefined, log a warning.""" + script_handler = ShellScriptPartHandler(self.paths) + instance_json = os.path.join(self.run_dir, 'instance-data.json') + instance_data = {'topkey': {'subkey': 'echo himom'}} + util.write_file(instance_json, util.json_dumps(instance_data)) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_START, + filename='part01', + payload='## template: jinja \n#!/bin/bash\n{{ goodtry }}', + frequency='freq', headers='headers') + script_file = os.path.join(script_handler.script_dir, 'part01') + self.assertTrue( + os.path.exists(script_file), + 'Missing expected file %s' % script_file) + self.assertIn( + "WARNING: Could not render jinja template variables in file" + " 'part01': 'goodtry'\n", + self.logs.getvalue()) + + +class TestConvertJinjaInstanceData(CiTestCase): + + def test_convert_instance_data_hyphens_to_underscores(self): + """Replace hyphenated keys with underscores in instance-data.""" + data = {'hyphenated-key': 'hyphenated-val', + 'underscore_delim_key': 'underscore_delimited_val'} + expected_data = {'hyphenated_key': 'hyphenated-val', + 'underscore_delim_key': 'underscore_delimited_val'} + self.assertEqual( + expected_data, + convert_jinja_instance_data(data=data)) + + def test_convert_instance_data_promotes_versioned_keys_to_top_level(self): + """Any versioned keys are promoted as top-level keys + + This provides any cloud-init standardized keys up at a top-level to + allow ease of reference for users. Intsead of v1.availability_zone, + the name availability_zone can be used in templates. + """ + data = {'ds': {'dskey1': 1, 'dskey2': 2}, + 'v1': {'v1key1': 'v1.1'}, + 'v2': {'v2key1': 'v2.1'}} + expected_data = copy.deepcopy(data) + expected_data.update({'v1key1': 'v1.1', 'v2key1': 'v2.1'}) + + converted_data = convert_jinja_instance_data(data=data) + self.assertItemsEqual( + ['ds', 'v1', 'v2', 'v1key1', 'v2key1'], converted_data.keys()) + self.assertEqual( + expected_data, + converted_data) + + def test_convert_instance_data_most_recent_version_of_promoted_keys(self): + """The most-recent versioned key value is promoted to top-level.""" + data = {'v1': {'key1': 'old v1 key1', 'key2': 'old v1 key2'}, + 'v2': {'key1': 'newer v2 key1', 'key3': 'newer v2 key3'}, + 'v3': {'key1': 'newest v3 key1'}} + expected_data = copy.deepcopy(data) + expected_data.update( + {'key1': 'newest v3 key1', 'key2': 'old v1 key2', + 'key3': 'newer v2 key3'}) + + converted_data = convert_jinja_instance_data(data=data) + self.assertEqual( + expected_data, + converted_data) + + def test_convert_instance_data_decodes_decode_paths(self): + """Any decode_paths provided are decoded by convert_instance_data.""" + data = {'key1': {'subkey1': 'aGkgbW9t'}, 'key2': 'aGkgZGFk'} + expected_data = copy.deepcopy(data) + expected_data['key1']['subkey1'] = 'hi mom' + + converted_data = convert_jinja_instance_data( + data=data, decode_paths=('key1/subkey1',)) + self.assertEqual( + expected_data, + converted_data) + + +class TestRenderJinjaPayload(CiTestCase): + + with_logs = True + + @skipUnlessJinja() + def test_render_jinja_payload_logs_jinja_vars_on_debug(self): + """When debug is True, log jinja varables available.""" + payload = ( + '## template: jinja\n#!/bin/sh\necho hi from {{ v1.hostname }}') + instance_data = {'v1': {'hostname': 'foo'}, 'instance-id': 'iid'} + expected_log = dedent("""\ + DEBUG: Converted jinja variables + { + "hostname": "foo", + "instance_id": "iid", + "v1": { + "hostname": "foo" + } + } + """) + self.assertEqual( + render_jinja_payload( + payload=payload, payload_fn='myfile', + instance_data=instance_data, debug=True), + '#!/bin/sh\necho hi from foo') + self.assertEqual(expected_log, self.logs.getvalue()) + + @skipUnlessJinja() + def test_render_jinja_payload_replaces_missing_variables_and_warns(self): + """Warn on missing jinja variables and replace the absent variable.""" + payload = ( + '## template: jinja\n#!/bin/sh\necho hi from {{ NOTHERE }}') + instance_data = {'v1': {'hostname': 'foo'}, 'instance-id': 'iid'} + self.assertEqual( + render_jinja_payload( + payload=payload, payload_fn='myfile', + instance_data=instance_data), + '#!/bin/sh\necho hi from CI_MISSING_JINJA_VAR/NOTHERE') + expected_log = ( + 'WARNING: Could not render jinja template variables in file' + " 'myfile': 'NOTHERE'") + self.assertIn(expected_log, self.logs.getvalue()) + # vi: ts=4 expandtab diff --git a/tests/unittests/test_handler/test_handler_etc_hosts.py b/tests/unittests/test_handler/test_handler_etc_hosts.py index ced05a8d..d854afcb 100644 --- a/tests/unittests/test_handler/test_handler_etc_hosts.py +++ b/tests/unittests/test_handler/test_handler_etc_hosts.py @@ -49,6 +49,7 @@ class TestHostsFile(t_help.FilesystemMockingTestCase): if '192.168.1.1\tblah.blah.us\tblah' not in contents: self.assertIsNone('Default etc/hosts content modified') + @t_help.skipUnlessJinja() def test_write_etc_hosts_suse_template(self): cfg = { 'manage_etc_hosts': 'template', diff --git a/tests/unittests/test_handler/test_handler_ntp.py b/tests/unittests/test_handler/test_handler_ntp.py index 6fe3659d..0f22e579 100644 --- a/tests/unittests/test_handler/test_handler_ntp.py +++ b/tests/unittests/test_handler/test_handler_ntp.py @@ -3,6 +3,7 @@ from cloudinit.config import cc_ntp from cloudinit.sources import DataSourceNone from cloudinit import (distros, helpers, cloud, util) + from cloudinit.tests.helpers import ( CiTestCase, FilesystemMockingTestCase, mock, skipUnlessJsonSchema) diff --git a/tests/unittests/test_templating.py b/tests/unittests/test_templating.py index 20c87efa..c36e6eb0 100644 --- a/tests/unittests/test_templating.py +++ b/tests/unittests/test_templating.py @@ -21,6 +21,9 @@ except ImportError: class TestTemplates(test_helpers.CiTestCase): + + with_logs = True + jinja_utf8 = b'It\xe2\x80\x99s not ascii, {{name}}\n' jinja_utf8_rbob = b'It\xe2\x80\x99s not ascii, bob\n'.decode('utf-8') @@ -124,6 +127,13 @@ $a,$b''' self.add_header("jinja", self.jinja_utf8), {"name": "bob"}), self.jinja_utf8_rbob) + def test_jinja_nonascii_render_undefined_variables_to_default_py3(self): + """Test py3 jinja render_to_string with undefined variable default.""" + self.assertEqual( + templater.render_string( + self.add_header("jinja", self.jinja_utf8), {}), + self.jinja_utf8_rbob.replace('bob', 'CI_MISSING_JINJA_VAR/name')) + def test_jinja_nonascii_render_to_file(self): """Test jinja render_to_file of a filename with non-ascii content.""" tmpl_fn = self.tmp_path("j-render-to-file.template") @@ -144,5 +154,18 @@ $a,$b''' result = templater.render_from_file(tmpl_fn, {"name": "bob"}) self.assertEqual(result, self.jinja_utf8_rbob) + @test_helpers.skipIfJinja() + def test_jinja_warns_on_missing_dep_and_uses_basic_renderer(self): + """Test jinja render_from_file will fallback to basic renderer.""" + tmpl_fn = self.tmp_path("j-render-from-file.template") + write_file(tmpl_fn, omode="wb", + content=self.add_header( + "jinja", self.jinja_utf8).encode('utf-8')) + result = templater.render_from_file(tmpl_fn, {"name": "bob"}) + self.assertEqual(result, self.jinja_utf8.decode()) + self.assertIn( + 'WARNING: Jinja not available as the selected renderer for desired' + ' template, reverting to the basic renderer.', + self.logs.getvalue()) # vi: ts=4 expandtab -- cgit v1.2.3 From fc4b966ba928b30b1c586407e752e0b51b1031e8 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Tue, 25 Sep 2018 21:59:16 +0000 Subject: cli: add cloud-init query subcommand to query instance metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cloud-init caches any cloud metadata crawled during boot in the file /run/cloud-init/instance-data.json. Cloud-init also standardizes some of that metadata across all clouds. The command 'cloud-init query' surfaces a simple CLI to query or format any cached instance metadata so that scripts or end-users do not have to write tools to crawl metadata themselves. Since 'cloud-init query' is runnable by non-root users, redact any sensitive data from instance-data.json and provide a root-readable unredacted instance-data-sensitive.json. Datasources can now define a sensitive_metadata_keys tuple which will redact any matching keys which could contain passwords or credentials from instance-data.json. Also add the following standardized 'v1' instance-data.json keys:   - user_data: The base64encoded user-data provided at instance launch   - vendor_data: Any vendor_data provided to the instance at launch   - underscore_delimited versions of existing hyphenated keys:     instance_id, local_hostname, availability_zone, cloud_name --- bash_completion/cloud-init | 4 +- cloudinit/cmd/devel/render.py | 7 +- cloudinit/cmd/main.py | 10 ++ cloudinit/cmd/query.py | 155 ++++++++++++++++++ cloudinit/cmd/tests/test_query.py | 193 +++++++++++++++++++++++ cloudinit/helpers.py | 4 + cloudinit/sources/__init__.py | 76 +++++++-- cloudinit/sources/tests/test_init.py | 130 ++++++++++++--- doc/rtd/index.rst | 1 + doc/rtd/topics/capabilities.rst | 105 ++++++++++--- doc/rtd/topics/datasources.rst | 148 +---------------- doc/rtd/topics/instancedata.rst | 297 +++++++++++++++++++++++++++++++++++ integration-requirements.txt | 3 +- tests/cloud_tests/testcases/base.py | 52 +++--- 14 files changed, 952 insertions(+), 233 deletions(-) create mode 100644 cloudinit/cmd/query.py create mode 100644 cloudinit/cmd/tests/test_query.py create mode 100644 doc/rtd/topics/instancedata.rst (limited to 'cloudinit/sources/tests') diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init index 6d01bf3a..8c25032f 100644 --- a/bash_completion/cloud-init +++ b/bash_completion/cloud-init @@ -10,7 +10,7 @@ _cloudinit_complete() cur_word="${COMP_WORDS[COMP_CWORD]}" prev_word="${COMP_WORDS[COMP_CWORD-1]}" - subcmds="analyze clean collect-logs devel dhclient-hook features init modules single status" + subcmds="analyze clean collect-logs devel dhclient-hook features init modules query single status" base_params="--help --file --version --debug --force" case ${COMP_CWORD} in 1) @@ -40,6 +40,8 @@ _cloudinit_complete() COMPREPLY=($(compgen -W "--help --mode" -- $cur_word)) ;; + query) + COMPREPLY=($(compgen -W "--all --help --instance-data --list-keys --user-data --vendor-data --debug" -- $cur_word));; single) COMPREPLY=($(compgen -W "--help --name --frequency --report" -- $cur_word)) ;; diff --git a/cloudinit/cmd/devel/render.py b/cloudinit/cmd/devel/render.py index e85933db..2ba6b681 100755 --- a/cloudinit/cmd/devel/render.py +++ b/cloudinit/cmd/devel/render.py @@ -9,7 +9,6 @@ import sys from cloudinit.handlers.jinja_template import render_jinja_payload_from_file from cloudinit import log from cloudinit.sources import INSTANCE_JSON_FILE -from cloudinit import util from . import addLogHandlerCLI, read_cfg_paths NAME = 'render' @@ -54,11 +53,7 @@ def handle_args(name, args): paths.run_dir, INSTANCE_JSON_FILE) else: instance_data_fn = args.instance_data - try: - with open(instance_data_fn) as stream: - instance_data = stream.read() - instance_data = util.load_json(instance_data) - except IOError: + if not os.path.exists(instance_data_fn): LOG.error('Missing instance-data.json file: %s', instance_data_fn) return 1 try: diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 0eee583c..5a437020 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -791,6 +791,10 @@ def main(sysv_args=None): ' pass to this module')) parser_single.set_defaults(action=('single', main_single)) + parser_query = subparsers.add_parser( + 'query', + help='Query standardized instance metadata from the command line.') + parser_dhclient = subparsers.add_parser('dhclient-hook', help=('run the dhclient hook' 'to record network info')) @@ -842,6 +846,12 @@ def main(sysv_args=None): clean_parser(parser_clean) parser_clean.set_defaults( action=('clean', handle_clean_args)) + elif sysv_args[0] == 'query': + from cloudinit.cmd.query import ( + get_parser as query_parser, handle_args as handle_query_args) + query_parser(parser_query) + parser_query.set_defaults( + action=('render', handle_query_args)) elif sysv_args[0] == 'status': from cloudinit.cmd.status import ( get_parser as status_parser, handle_status_args) diff --git a/cloudinit/cmd/query.py b/cloudinit/cmd/query.py new file mode 100644 index 00000000..7d2d4fe4 --- /dev/null +++ b/cloudinit/cmd/query.py @@ -0,0 +1,155 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Query standardized instance metadata from the command line.""" + +import argparse +import os +import six +import sys + +from cloudinit.handlers.jinja_template import ( + convert_jinja_instance_data, render_jinja_payload) +from cloudinit.cmd.devel import addLogHandlerCLI, read_cfg_paths +from cloudinit import log +from cloudinit.sources import ( + INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE) +from cloudinit import util + +NAME = 'query' +LOG = log.getLogger(NAME) + + +def get_parser(parser=None): + """Build or extend an arg parser for query utility. + + @param parser: Optional existing ArgumentParser instance representing the + query subcommand which will be extended to support the args of + this utility. + + @returns: ArgumentParser with proper argument configuration. + """ + if not parser: + parser = argparse.ArgumentParser( + prog=NAME, description='Query cloud-init instance data') + parser.add_argument( + '-d', '--debug', action='store_true', default=False, + help='Add verbose messages during template render') + parser.add_argument( + '-i', '--instance-data', type=str, + help=('Path to instance-data.json file. Default is /run/cloud-init/%s' + % INSTANCE_JSON_FILE)) + parser.add_argument( + '-l', '--list-keys', action='store_true', default=False, + help=('List query keys available at the provided instance-data' + ' .')) + parser.add_argument( + '-u', '--user-data', type=str, + help=('Path to user-data file. Default is' + ' /var/lib/cloud/instance/user-data.txt')) + parser.add_argument( + '-v', '--vendor-data', type=str, + help=('Path to vendor-data file. Default is' + ' /var/lib/cloud/instance/vendor-data.txt')) + parser.add_argument( + 'varname', type=str, nargs='?', + help=('A dot-delimited instance data variable to query from' + ' instance-data query. For example: v2.local_hostname')) + parser.add_argument( + '-a', '--all', action='store_true', default=False, dest='dump_all', + help='Dump all available instance-data') + parser.add_argument( + '-f', '--format', type=str, dest='format', + help=('Optionally specify a custom output format string. Any' + ' instance-data variable can be specified between double-curly' + ' braces. For example -f "{{ v2.cloud_name }}"')) + return parser + + +def handle_args(name, args): + """Handle calls to 'cloud-init query' as a subcommand.""" + paths = None + addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING) + if not any([args.list_keys, args.varname, args.format, args.dump_all]): + LOG.error( + 'Expected one of the options: --all, --format,' + ' --list-keys or varname') + get_parser().print_help() + return 1 + + uid = os.getuid() + if not all([args.instance_data, args.user_data, args.vendor_data]): + paths = read_cfg_paths() + if not args.instance_data: + if uid == 0: + default_json_fn = INSTANCE_JSON_SENSITIVE_FILE + else: + default_json_fn = INSTANCE_JSON_FILE # World readable + instance_data_fn = os.path.join(paths.run_dir, default_json_fn) + else: + instance_data_fn = args.instance_data + if not args.user_data: + user_data_fn = os.path.join(paths.instance_link, 'user-data.txt') + else: + user_data_fn = args.user_data + if not args.vendor_data: + vendor_data_fn = os.path.join(paths.instance_link, 'vendor-data.txt') + else: + vendor_data_fn = args.vendor_data + + try: + instance_json = util.load_file(instance_data_fn) + except IOError: + LOG.error('Missing instance-data.json file: %s', instance_data_fn) + return 1 + + instance_data = util.load_json(instance_json) + if uid != 0: + instance_data['userdata'] = ( + '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, user_data_fn)) + instance_data['vendordata'] = ( + '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, vendor_data_fn)) + else: + instance_data['userdata'] = util.load_file(user_data_fn) + instance_data['vendordata'] = util.load_file(vendor_data_fn) + if args.format: + payload = '## template: jinja\n{fmt}'.format(fmt=args.format) + rendered_payload = render_jinja_payload( + payload=payload, payload_fn='query commandline', + instance_data=instance_data, + debug=True if args.debug else False) + if rendered_payload: + print(rendered_payload) + return 0 + return 1 + + response = convert_jinja_instance_data(instance_data) + if args.varname: + try: + for var in args.varname.split('.'): + response = response[var] + except KeyError: + LOG.error('Undefined instance-data key %s', args.varname) + return 1 + if args.list_keys: + if not isinstance(response, dict): + LOG.error("--list-keys provided but '%s' is not a dict", var) + return 1 + response = '\n'.join(sorted(response.keys())) + elif args.list_keys: + response = '\n'.join(sorted(response.keys())) + if not isinstance(response, six.string_types): + response = util.json_dumps(response) + print(response) + return 0 + + +def main(): + """Tool to query specific instance-data values.""" + parser = get_parser() + sys.exit(handle_args(NAME, parser.parse_args())) + + +if __name__ == '__main__': + main() + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/tests/test_query.py b/cloudinit/cmd/tests/test_query.py new file mode 100644 index 00000000..fb87c6ab --- /dev/null +++ b/cloudinit/cmd/tests/test_query.py @@ -0,0 +1,193 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from six import StringIO +from textwrap import dedent +import os + +from collections import namedtuple +from cloudinit.cmd import query +from cloudinit.helpers import Paths +from cloudinit.sources import REDACT_SENSITIVE_VALUE, INSTANCE_JSON_FILE +from cloudinit.tests.helpers import CiTestCase, mock +from cloudinit.util import ensure_dir, write_file + + +class TestQuery(CiTestCase): + + with_logs = True + + args = namedtuple( + 'queryargs', + ('debug dump_all format instance_data list_keys user_data vendor_data' + ' varname')) + + def setUp(self): + super(TestQuery, self).setUp() + self.tmp = self.tmp_dir() + self.instance_data = self.tmp_path('instance-data', dir=self.tmp) + + def test_handle_args_error_on_missing_param(self): + """Error when missing required parameters and print usage.""" + args = self.args( + debug=False, dump_all=False, format=None, instance_data=None, + list_keys=False, user_data=None, vendor_data=None, varname=None) + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(1, query.handle_args('anyname', args)) + expected_error = ( + 'ERROR: Expected one of the options: --all, --format, --list-keys' + ' or varname\n') + self.assertIn(expected_error, self.logs.getvalue()) + self.assertIn('usage: query', m_stdout.getvalue()) + self.assertIn(expected_error, m_stderr.getvalue()) + + def test_handle_args_error_on_missing_instance_data(self): + """When instance_data file path does not exist, log an error.""" + absent_fn = self.tmp_path('absent', dir=self.tmp) + args = self.args( + debug=False, dump_all=True, format=None, instance_data=absent_fn, + list_keys=False, user_data='ud', vendor_data='vd', varname=None) + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + self.assertEqual(1, query.handle_args('anyname', args)) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % absent_fn, + self.logs.getvalue()) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % absent_fn, + m_stderr.getvalue()) + + def test_handle_args_defaults_instance_data(self): + """When no instance_data argument, default to configured run_dir.""" + args = self.args( + debug=False, dump_all=True, format=None, instance_data=None, + list_keys=False, user_data=None, vendor_data=None, varname=None) + run_dir = self.tmp_path('run_dir', dir=self.tmp) + ensure_dir(run_dir) + paths = Paths({'run_dir': run_dir}) + self.add_patch('cloudinit.cmd.query.read_cfg_paths', 'm_paths') + self.m_paths.return_value = paths + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + self.assertEqual(1, query.handle_args('anyname', args)) + json_file = os.path.join(run_dir, INSTANCE_JSON_FILE) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % json_file, + self.logs.getvalue()) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % json_file, + m_stderr.getvalue()) + + def test_handle_args_dumps_all_instance_data(self): + """When --all is specified query will dump all instance data vars.""" + write_file(self.instance_data, '{"my-var": "it worked"}') + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, list_keys=False, + user_data='ud', vendor_data='vd', varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual( + '{\n "my_var": "it worked",\n "userdata": "<%s> file:ud",\n' + ' "vendordata": "<%s> file:vd"\n}\n' % ( + REDACT_SENSITIVE_VALUE, REDACT_SENSITIVE_VALUE), + m_stdout.getvalue()) + + def test_handle_args_returns_top_level_varname(self): + """When the argument varname is passed, report its value.""" + write_file(self.instance_data, '{"my-var": "it worked"}') + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, list_keys=False, + user_data='ud', vendor_data='vd', varname='my_var') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual('it worked\n', m_stdout.getvalue()) + + def test_handle_args_returns_nested_varname(self): + """If user_data file is a jinja template render instance-data vars.""" + write_file(self.instance_data, + '{"v1": {"key-2": "value-2"}, "my-var": "it worked"}') + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, user_data='ud', vendor_data='vd', + list_keys=False, varname='v1.key_2') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual('value-2\n', m_stdout.getvalue()) + + def test_handle_args_returns_standardized_vars_to_top_level_aliases(self): + """Any standardized vars under v# are promoted as top-level aliases.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},' + ' "top": "gun"}') + expected = dedent("""\ + { + "top": "gun", + "userdata": " file:ud", + "v1": { + "v1_1": "val1.1" + }, + "v1_1": "val1.1", + "v2": { + "v2_2": "val2.2" + }, + "v2_2": "val2.2", + "vendordata": " file:vd" + } + """) + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, user_data='ud', vendor_data='vd', + list_keys=False, varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_sorts_top_level_keys_when_no_varname(self): + """Sort all top-level keys when only --list-keys provided.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},' + ' "top": "gun"}') + expected = 'top\nuserdata\nv1\nv1_1\nv2\nv2_2\nvendordata\n' + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, user_data='ud', + vendor_data='vd', varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_sorts_nested_keys_when_varname(self): + """Sort all nested keys of varname object when --list-keys provided.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2":' + + ' {"v2_2": "val2.2"}, "top": "gun"}') + expected = 'v1_1\nv1_2\n' + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, + user_data='ud', vendor_data='vd', varname='v1') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_errors_when_varname_is_not_a_dict(self): + """Raise an error when --list-keys and varname specify a non-list.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2": ' + + '{"v2_2": "val2.2"}, "top": "gun"}') + expected_error = "ERROR: --list-keys provided but 'top' is not a dict" + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, user_data='ud', + vendor_data='vd', varname='top') + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(1, query.handle_args('anyname', args)) + self.assertEqual('', m_stdout.getvalue()) + self.assertIn(expected_error, m_stderr.getvalue()) + +# vi: ts=4 expandtab diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py index 3cc1fb19..dcd2645e 100644 --- a/cloudinit/helpers.py +++ b/cloudinit/helpers.py @@ -239,6 +239,10 @@ class ConfigMerger(object): if cc_fn and os.path.isfile(cc_fn): try: i_cfgs.append(util.read_conf(cc_fn)) + except PermissionError: + LOG.debug( + 'Skipped loading cloud-config from %s due to' + ' non-root.', cc_fn) except Exception: util.logexc(LOG, 'Failed loading of cloud-config from %s', cc_fn) diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index a775f1a8..730e8174 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -38,8 +38,12 @@ DEP_FILESYSTEM = "FILESYSTEM" DEP_NETWORK = "NETWORK" DS_PREFIX = 'DataSource' -# File in which instance meta-data, user-data and vendor-data is written +# File in which public available instance meta-data is written +# security-sensitive key values are redacted from this world-readable file INSTANCE_JSON_FILE = 'instance-data.json' +# security-sensitive key values are present in this root-readable file +INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json' +REDACT_SENSITIVE_VALUE = 'redacted for non-root user' # Key which can be provide a cloud's official product name to cloud-init METADATA_CLOUD_NAME_KEY = 'cloud-name' @@ -58,7 +62,7 @@ class InvalidMetaDataException(Exception): pass -def process_instance_metadata(metadata, key_path=''): +def process_instance_metadata(metadata, key_path='', sensitive_keys=()): """Process all instance metadata cleaning it up for persisting as json. Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list @@ -67,22 +71,46 @@ def process_instance_metadata(metadata, key_path=''): """ md_copy = copy.deepcopy(metadata) md_copy['base64_encoded_keys'] = [] + md_copy['sensitive_keys'] = [] for key, val in metadata.items(): if key_path: sub_key_path = key_path + '/' + key else: sub_key_path = key + if key in sensitive_keys or sub_key_path in sensitive_keys: + md_copy['sensitive_keys'].append(sub_key_path) if isinstance(val, str) and val.startswith('ci-b64:'): md_copy['base64_encoded_keys'].append(sub_key_path) md_copy[key] = val.replace('ci-b64:', '') if isinstance(val, dict): - return_val = process_instance_metadata(val, sub_key_path) + return_val = process_instance_metadata( + val, sub_key_path, sensitive_keys) md_copy['base64_encoded_keys'].extend( return_val.pop('base64_encoded_keys')) + md_copy['sensitive_keys'].extend( + return_val.pop('sensitive_keys')) md_copy[key] = return_val return md_copy +def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE): + """Redact any sensitive keys from to provided metadata dictionary. + + Replace any keys values listed in 'sensitive_keys' with redact_value. + """ + if not metadata.get('sensitive_keys', []): + return metadata + md_copy = copy.deepcopy(metadata) + for key_path in metadata.get('sensitive_keys'): + path_parts = key_path.split('/') + obj = md_copy + for path in path_parts: + if isinstance(obj[path], dict) and path != path_parts[-1]: + obj = obj[path] + obj[path] = redact_value + return md_copy + + URLParams = namedtuple( 'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries']) @@ -127,6 +155,10 @@ class DataSource(object): _dirty_cache = False + # N-tuple of keypaths or keynames redact from instance-data.json for + # non-root users + sensitive_metadata_keys = ('security-credentials',) + def __init__(self, sys_cfg, distro, paths, ud_proc=None): self.sys_cfg = sys_cfg self.distro = distro @@ -152,12 +184,24 @@ class DataSource(object): def _get_standardized_metadata(self): """Return a dictionary of standardized metadata keys.""" - return {'v1': { - 'local-hostname': self.get_hostname(), - 'instance-id': self.get_instance_id(), - 'cloud-name': self.cloud_name, - 'region': self.region, - 'availability-zone': self.availability_zone}} + local_hostname = self.get_hostname() + instance_id = self.get_instance_id() + availability_zone = self.availability_zone + cloud_name = self.cloud_name + # When adding new standard keys prefer underscore-delimited instead + # of hyphen-delimted to support simple variable references in jinja + # templates. + return { + 'v1': { + 'availability-zone': availability_zone, + 'availability_zone': availability_zone, + 'cloud-name': cloud_name, + 'cloud_name': cloud_name, + 'instance-id': instance_id, + 'instance_id': instance_id, + 'local-hostname': local_hostname, + 'local_hostname': local_hostname, + 'region': self.region}} def clear_cached_attrs(self, attr_defaults=()): """Reset any cached metadata attributes to datasource defaults. @@ -200,9 +244,7 @@ class DataSource(object): """ instance_data = { 'ds': { - 'meta_data': self.metadata, - 'user_data': self.get_userdata_raw(), - 'vendor_data': self.get_vendordata_raw()}} + 'meta_data': self.metadata}} if hasattr(self, 'network_json'): network_json = getattr(self, 'network_json') if network_json != UNSET: @@ -217,7 +259,9 @@ class DataSource(object): # Process content base64encoding unserializable values content = util.json_dumps(instance_data) # Strip base64: prefix and set base64_encoded_keys list. - processed_data = process_instance_metadata(json.loads(content)) + processed_data = process_instance_metadata( + json.loads(content), + sensitive_keys=self.sensitive_metadata_keys) except TypeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) return False @@ -225,7 +269,11 @@ class DataSource(object): LOG.warning('Error persisting instance-data.json: %s', str(e)) return False json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) - write_json(json_file, processed_data, mode=0o600) + write_json(json_file, processed_data) # World readable + json_sensitive_file = os.path.join(self.paths.run_dir, + INSTANCE_JSON_SENSITIVE_FILE) + write_json(json_sensitive_file, + redact_sensitive_keys(processed_data), mode=0o600) return True def _get_data(self): diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index 8299af23..6b965750 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -1,5 +1,6 @@ # This file is part of cloud-init. See LICENSE file for license information. +import copy import inspect import os import six @@ -9,7 +10,8 @@ from cloudinit.event import EventType from cloudinit.helpers import Paths from cloudinit import importer from cloudinit.sources import ( - INSTANCE_JSON_FILE, DataSource, UNSET) + INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE, + UNSET, DataSource, redact_sensitive_keys) from cloudinit.tests.helpers import CiTestCase, skipIf, mock from cloudinit.user_data import UserDataProcessor from cloudinit import util @@ -20,20 +22,24 @@ class DataSourceTestSubclassNet(DataSource): dsname = 'MyTestSubclass' url_max_wait = 55 - def __init__(self, sys_cfg, distro, paths, custom_userdata=None, - get_data_retval=True): + def __init__(self, sys_cfg, distro, paths, custom_metadata=None, + custom_userdata=None, get_data_retval=True): super(DataSourceTestSubclassNet, self).__init__( sys_cfg, distro, paths) self._custom_userdata = custom_userdata + self._custom_metadata = custom_metadata self._get_data_retval = get_data_retval def _get_cloud_name(self): return 'SubclassCloudName' def _get_data(self): - self.metadata = {'availability_zone': 'myaz', - 'local-hostname': 'test-subclass-hostname', - 'region': 'myregion'} + if self._custom_metadata: + self.metadata = self._custom_metadata + else: + self.metadata = {'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion'} if self._custom_userdata: self.userdata_raw = self._custom_userdata else: @@ -278,7 +284,7 @@ class TestDataSource(CiTestCase): os.path.exists(json_file), 'Found unexpected file %s' % json_file) def test_get_data_writes_json_instance_data_on_success(self): - """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root.""" + """get_data writes INSTANCE_JSON_FILE to run_dir as world readable.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp})) @@ -287,40 +293,90 @@ class TestDataSource(CiTestCase): content = util.load_file(json_file) expected = { 'base64_encoded_keys': [], + 'sensitive_keys': [], 'v1': { 'availability-zone': 'myaz', + 'availability_zone': 'myaz', 'cloud-name': 'subclasscloudname', + 'cloud_name': 'subclasscloudname', 'instance-id': 'iid-datasource', + 'instance_id': 'iid-datasource', 'local-hostname': 'test-subclass-hostname', + 'local_hostname': 'test-subclass-hostname', 'region': 'myregion'}, 'ds': { 'meta_data': {'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', - 'region': 'myregion'}, - 'user_data': 'userdata_raw', - 'vendor_data': 'vendordata_raw'}} - self.maxDiff = None + 'region': 'myregion'}}} self.assertEqual(expected, util.load_json(content)) file_stat = os.stat(json_file) + self.assertEqual(0o644, stat.S_IMODE(file_stat.st_mode)) + self.assertEqual(expected, util.load_json(content)) + + def test_get_data_writes_json_instance_data_sensitive(self): + """get_data writes INSTANCE_JSON_SENSITIVE_FILE as readonly root.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp}), + custom_metadata={ + 'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion', + 'some': {'security-credentials': { + 'cred1': 'sekret', 'cred2': 'othersekret'}}}) + self.assertEqual( + ('security-credentials',), datasource.sensitive_metadata_keys) + datasource.get_data() + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + sensitive_json_file = self.tmp_path(INSTANCE_JSON_SENSITIVE_FILE, tmp) + redacted = util.load_json(util.load_file(json_file)) + self.assertEqual( + {'cred1': 'sekret', 'cred2': 'othersekret'}, + redacted['ds']['meta_data']['some']['security-credentials']) + content = util.load_file(sensitive_json_file) + expected = { + 'base64_encoded_keys': [], + 'sensitive_keys': ['ds/meta_data/some/security-credentials'], + 'v1': { + 'availability-zone': 'myaz', + 'availability_zone': 'myaz', + 'cloud-name': 'subclasscloudname', + 'cloud_name': 'subclasscloudname', + 'instance-id': 'iid-datasource', + 'instance_id': 'iid-datasource', + 'local-hostname': 'test-subclass-hostname', + 'local_hostname': 'test-subclass-hostname', + 'region': 'myregion'}, + 'ds': { + 'meta_data': { + 'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion', + 'some': {'security-credentials': REDACT_SENSITIVE_VALUE}}} + } + self.maxDiff = None + self.assertEqual(expected, util.load_json(content)) + file_stat = os.stat(sensitive_json_file) self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode)) + self.assertEqual(expected, util.load_json(content)) def test_get_data_handles_redacted_unserializable_content(self): """get_data warns unserializable content in INSTANCE_JSON_FILE.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) datasource.get_data() json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) - expected_userdata = { + expected_metadata = { 'key1': 'val1', 'key2': { 'key2.1': "Warning: redacted unserializable type "}} instance_json = util.load_json(content) self.assertEqual( - expected_userdata, instance_json['ds']['user_data']) + expected_metadata, instance_json['ds']['meta_data']) def test_persist_instance_data_writes_ec2_metadata_when_set(self): """When ec2_metadata class attribute is set, persist to json.""" @@ -361,17 +417,17 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) instance_json = util.load_json(content) - self.assertEqual( - ['ds/user_data/key2/key2.1'], + self.assertItemsEqual( + ['ds/meta_data/key2/key2.1'], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': 'EjM='}}, - instance_json['ds']['user_data']) + instance_json['ds']['meta_data']) @skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes") def test_get_data_handles_bytes_values(self): @@ -379,7 +435,7 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) @@ -387,7 +443,7 @@ class TestDataSource(CiTestCase): self.assertEqual([], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': '\x123'}}, - instance_json['ds']['user_data']) + instance_json['ds']['meta_data']) @skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8") def test_non_utf8_encoding_logs_warning(self): @@ -395,7 +451,7 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) self.assertFalse(os.path.exists(json_file)) @@ -509,4 +565,36 @@ class TestDataSource(CiTestCase): self.logs.getvalue()) +class TestRedactSensitiveData(CiTestCase): + + def test_redact_sensitive_data_noop_when_no_sensitive_keys_present(self): + """When sensitive_keys is absent or empty from metadata do nothing.""" + md = {'my': 'data'} + self.assertEqual( + md, redact_sensitive_keys(md, redact_value='redacted')) + md['sensitive_keys'] = [] + self.assertEqual( + md, redact_sensitive_keys(md, redact_value='redacted')) + + def test_redact_sensitive_data_redacts_exact_match_name(self): + """Only exact matched sensitive_keys are redacted from metadata.""" + md = {'sensitive_keys': ['md/secure'], + 'md': {'secure': 's3kr1t', 'insecure': 'publik'}} + secure_md = copy.deepcopy(md) + secure_md['md']['secure'] = 'redacted' + self.assertEqual( + secure_md, + redact_sensitive_keys(md, redact_value='redacted')) + + def test_redact_sensitive_data_does_redacts_with_default_string(self): + """When redact_value is absent, REDACT_SENSITIVE_VALUE is used.""" + md = {'sensitive_keys': ['md/secure'], + 'md': {'secure': 's3kr1t', 'insecure': 'publik'}} + secure_md = copy.deepcopy(md) + secure_md['md']['secure'] = 'redacted for non-root user' + self.assertEqual( + secure_md, + redact_sensitive_keys(md)) + + # vi: ts=4 expandtab diff --git a/doc/rtd/index.rst b/doc/rtd/index.rst index de67f361..20a99a30 100644 --- a/doc/rtd/index.rst +++ b/doc/rtd/index.rst @@ -31,6 +31,7 @@ initialization of a cloud instance. topics/capabilities.rst topics/availability.rst topics/format.rst + topics/instancedata.rst topics/dir_layout.rst topics/examples.rst topics/boot.rst diff --git a/doc/rtd/topics/capabilities.rst b/doc/rtd/topics/capabilities.rst index 2d8e2538..0d8b8947 100644 --- a/doc/rtd/topics/capabilities.rst +++ b/doc/rtd/topics/capabilities.rst @@ -18,7 +18,7 @@ User configurability User-data can be given by the user at instance launch time. See :ref:`user_data_formats` for acceptable user-data content. - + This is done via the ``--user-data`` or ``--user-data-file`` argument to ec2-run-instances for example. @@ -53,10 +53,9 @@ system: % cloud-init --help usage: cloud-init [-h] [--version] [--file FILES] - [--debug] [--force] - {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} - ... + {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status} + ... optional arguments: -h, --help show this help message and exit @@ -68,17 +67,19 @@ system: your own risk) Subcommands: - {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} + {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status} init initializes cloud-init and performs initial modules modules activates modules using a given configuration key single run a single module + query Query instance metadata from the command line dhclient-hook run the dhclient hookto record network info features list defined features analyze Devel tool: Analyze cloud-init logs and data devel Run development tools collect-logs Collect and tar all cloud-init debug info - clean Remove logs and artifacts so cloud-init can re-run. - status Report cloud-init status or wait on completion. + clean Remove logs and artifacts so cloud-init can re-run + status Report cloud-init status or wait on completion + CLI Subcommand details ====================== @@ -104,8 +105,8 @@ cloud-init status Report whether cloud-init is running, done, disabled or errored. Exits non-zero if an error is detected in cloud-init. - * **--long**: Detailed status information. - * **--wait**: Block until cloud-init completes. +* **--long**: Detailed status information. +* **--wait**: Block until cloud-init completes. .. code-block:: shell-session @@ -143,6 +144,68 @@ Logs collected are: * journalctl output * /var/lib/cloud/instance/user-data.txt +.. _cli_query: + +cloud-init query +------------------ +Query standardized cloud instance metadata crawled by cloud-init and stored +in ``/run/cloud-init/instance-data.json``. This is a convenience command-line +interface to reference any cached configuration metadata that cloud-init +crawls when booting the instance. See :ref:`instance_metadata` for more info. + +* **--all**: Dump all available instance data as json which can be queried. +* **--instance-data**: Optional path to a different instance-data.json file to + source for queries. +* **--list-keys**: List available query keys from cached instance data. + +.. code-block:: shell-session + + # List all top-level query keys available (includes standardized aliases) + % cloud-init query --list-keys + availability_zone + base64_encoded_keys + cloud_name + ds + instance_id + local_hostname + region + v1 + +* ****: A dot-delimited variable path into the instance-data.json + object. + +.. code-block:: shell-session + + # Query cloud-init standardized metadata on any cloud + % cloud-init query v1.cloud_name + aws # or openstack, azure, gce etc. + + # Any standardized instance-data under a key is aliased as a top-level + # key for convenience. + % cloud-init query cloud_name + aws # or openstack, azure, gce etc. + + # Query datasource-specific metadata on EC2 + % cloud-init query ds.meta_data.public_ipv4 + +* **--format** A string that will use jinja-template syntax to render a string + replacing + +.. code-block:: shell-session + + # Generate a custom hostname fqdn based on instance-id, cloud and region + % cloud-init query --format 'custom-{{instance_id}}.{{region}}.{{v1.cloud_name}}.com' + custom-i-0e91f69987f37ec74.us-east-2.aws.com + + +.. note:: + The standardized instance data keys under **v#** are guaranteed not to change + behavior or format. If using top-level convenience aliases for any + standardized instance data keys, the most value (highest **v#**) of that key + name is what is reported as the top-level value. So these aliases act as a + 'latest'. + + .. _cli_analyze: cloud-init analyze @@ -150,10 +213,10 @@ cloud-init analyze Get detailed reports of where cloud-init spends most of its time. See :ref:`boot_time_analysis` for more info. - * **blame** Report ordered by most costly operations. - * **dump** Machine-readable JSON dump of all cloud-init tracked events. - * **show** show time-ordered report of the cost of operations during each - boot stage. +* **blame** Report ordered by most costly operations. +* **dump** Machine-readable JSON dump of all cloud-init tracked events. +* **show** show time-ordered report of the cost of operations during each + boot stage. .. _cli_devel: @@ -182,8 +245,8 @@ cloud-init clean Remove cloud-init artifacts from /var/lib/cloud and optionally reboot the machine to so cloud-init re-runs all stages as it did on first boot. - * **--logs**: Optionally remove /var/log/cloud-init*log files. - * **--reboot**: Reboot the system after removing artifacts. +* **--logs**: Optionally remove /var/log/cloud-init*log files. +* **--reboot**: Reboot the system after removing artifacts. .. _cli_init: @@ -195,7 +258,7 @@ Can be run on the commandline, but is generally gated to run only once due to semaphores in **/var/lib/cloud/instance/sem/** and **/var/lib/cloud/sem**. - * **--local**: Run *init-local* stage instead of *init*. +* **--local**: Run *init-local* stage instead of *init*. .. _cli_modules: @@ -210,8 +273,8 @@ declared to run in various boot stages in the file commandline, but each module is gated to run only once due to semaphores in ``/var/lib/cloud/``. - * **--mode (init|config|final)**: Run *modules:init*, *modules:config* or - *modules:final* cloud-init stages. See :ref:`boot_stages` for more info. +* **--mode (init|config|final)**: Run *modules:init*, *modules:config* or + *modules:final* cloud-init stages. See :ref:`boot_stages` for more info. .. _cli_single: @@ -221,9 +284,9 @@ Attempt to run a single named cloud config module. The following example re-runs the cc_set_hostname module ignoring the module default frequency of once-per-instance: - * **--name**: The cloud-config module name to run - * **--frequency**: Optionally override the declared module frequency - with one of (always|once-per-instance|once) +* **--name**: The cloud-config module name to run +* **--frequency**: Optionally override the declared module frequency + with one of (always|once-per-instance|once) .. code-block:: shell-session diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index 14432e65..e34f145c 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -17,146 +17,10 @@ own way) internally a datasource abstract class was created to allow for a single way to access the different cloud systems methods to provide this data through the typical usage of subclasses. - -.. _instance_metadata: - -instance-data -------------- -For reference, cloud-init stores all the metadata, vendordata and userdata -provided by a cloud in a json blob at ``/run/cloud-init/instance-data.json``. -While the json contains datasource-specific keys and names, cloud-init will -maintain a minimal set of standardized keys that will remain stable on any -cloud. Standardized instance-data keys will be present under a "v1" key. -Any datasource metadata cloud-init consumes will all be present under the -"ds" key. - -Below is an instance-data.json example from an OpenStack instance: - -.. sourcecode:: json - - { - "base64-encoded-keys": [ - "ds/meta-data/random_seed", - "ds/user-data" - ], - "ds": { - "ec2_metadata": { - "ami-id": "ami-0000032f", - "ami-launch-index": "0", - "ami-manifest-path": "FIXME", - "block-device-mapping": { - "ami": "vda", - "ephemeral0": "/dev/vdb", - "root": "/dev/vda" - }, - "hostname": "xenial-test.novalocal", - "instance-action": "none", - "instance-id": "i-0006e030", - "instance-type": "m1.small", - "local-hostname": "xenial-test.novalocal", - "local-ipv4": "10.5.0.6", - "placement": { - "availability-zone": "None" - }, - "public-hostname": "xenial-test.novalocal", - "public-ipv4": "10.245.162.145", - "reservation-id": "r-fxm623oa", - "security-groups": "default" - }, - "meta-data": { - "availability_zone": null, - "devices": [], - "hostname": "xenial-test.novalocal", - "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0", - "launch_index": 0, - "local-hostname": "xenial-test.novalocal", - "name": "xenial-test", - "project_id": "e0eb2d2538814...", - "random_seed": "A6yPN...", - "uuid": "3e39d278-0644-4728-9479-678f92..." - }, - "network_json": { - "links": [ - { - "ethernet_mac_address": "fa:16:3e:7d:74:9b", - "id": "tap9ca524d5-6e", - "mtu": 8958, - "type": "ovs", - "vif_id": "9ca524d5-6e5a-4809-936a-6901..." - } - ], - "networks": [ - { - "id": "network0", - "link": "tap9ca524d5-6e", - "network_id": "c6adfc18-9753-42eb-b3ea-18b57e6b837f", - "type": "ipv4_dhcp" - } - ], - "services": [ - { - "address": "10.10.160.2", - "type": "dns" - } - ] - }, - "user-data": "I2Nsb3VkLWNvbmZpZ...", - "vendor-data": null - }, - "v1": { - "availability-zone": null, - "cloud-name": "openstack", - "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0", - "local-hostname": "xenial-test", - "region": null - } - } - - -As of cloud-init v. 18.4, any values present in -``/run/cloud-init/instance-data.json`` can be used in cloud-init user data -scripts or cloud config data. This allows consumers to use cloud-init's -vendor-neutral, standardized metadata keys as well as datasource-specific -content for any scripts or cloud-config modules they are using. - -To use instance-data.json values in scripts and **#config-config** files the -user-data will need to contain the following header as the first line **## template: jinja**. Cloud-init will source all variables defined in -``/run/cloud-init/instance-data.json`` and allow scripts or cloud-config files -to reference those paths. Below are two examples:: - - * Cloud config calling home with the ec2 public hostname and avaliability-zone - ``` - ## template: jinja - #cloud-config - runcmd: - - echo 'EC2 public hostname allocated to instance: {{ ds.meta_data.public_hostname }}' > /tmp/instance_metadata - - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> /tmp/instance_metadata - - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", "availability-zone": "{{ v1.availability_zone }}"}' https://example.com.com - ``` - - * Custom user script performing different operations based on region - ``` - ## template: jinja - #!/bin/bash - {% if v1.region == 'us-east-2' -%} - echo 'Installing custom proxies for {{ v1.region }} - sudo apt-get install my-xtra-fast-stack - {%- endif %} - ... - - ``` - -.. note:: - Trying to reference jinja variables that don't exist in - instance-data.json will result in warnings in ``/var/log/cloud-init.log`` - and the following string in your rendered user-data: - ``CI_MISSING_JINJA_VAR/``. - -.. note:: - To save time designing your user-data for a specific cloud's - instance-data.json, use the 'render' cloud-init command on an - instance booted on your favorite cloud. See :ref:`cli_devel` for more - information. +Any metadata processed by cloud-init's datasources is persisted as +``/run/cloud0-init/instance-data.json``. Cloud-init provides tooling +to quickly introspect some of that data. See :ref:`instance_metadata` for +more information. Datasource API @@ -196,14 +60,14 @@ The current interface that a datasource object must provide is the following: # or does not exist) def device_name_to_device(self, name) - # gets the locale string this instance should be applying + # gets the locale string this instance should be applying # which typically used to adjust the instances locale settings files def get_locale(self) @property def availability_zone(self) - # gets the instance id that was assigned to this instance by the + # gets the instance id that was assigned to this instance by the # cloud provider or when said instance id does not exist in the backing # metadata this will return 'iid-datasource' def get_instance_id(self) diff --git a/doc/rtd/topics/instancedata.rst b/doc/rtd/topics/instancedata.rst new file mode 100644 index 00000000..634e1807 --- /dev/null +++ b/doc/rtd/topics/instancedata.rst @@ -0,0 +1,297 @@ +.. _instance_metadata: + +***************** +Instance Metadata +***************** + +What is a instance data? +======================== + +Instance data is the collection of all configuration data that cloud-init +processes to configure the instance. This configuration typically +comes from any number of sources: + +* cloud-provided metadata services (aka metadata) +* custom config-drive attached to the instance +* cloud-config seed files in the booted cloud image or distribution +* vendordata provided from files or cloud metadata services +* userdata provided at instance creation + +Each cloud provider presents unique configuration metadata in different +formats to the instance. Cloud-init provides a cache of any crawled metadata +as well as a versioned set of standardized instance data keys which it makes +available on all platforms. + +Cloud-init produces a simple json object in +``/run/cloud-init/instance-data.json`` which represents standardized and +versioned representation of the metadata it consumes during initial boot. The +intent is to provide the following benefits to users or scripts on any system +deployed with cloud-init: + +* simple static object to query to obtain a instance's metadata +* speed: avoid costly network transactions for metadata that is already cached + on the filesytem +* reduce need to recrawl metadata services for static metadata that is already + cached +* leverage cloud-init's best practices for crawling cloud-metadata services +* avoid rolling unique metadata crawlers on each cloud platform to get + metadata configuration values + +Cloud-init stores any instance data processed in the following files: + +* ``/run/cloud-init/instance-data.json``: world-readable json containing + standardized keys, sensitive keys redacted +* ``/run/cloud-init/instance-data-sensitive.json``: root-readable unredacted + json blob +* ``/var/lib/cloud/instance/user-data.txt``: root-readable sensitive raw + userdata +* ``/var/lib/cloud/instance/vendor-data.txt``: root-readable sensitive raw + vendordata + +Cloud-init redacts any security sensitive content from instance-data.json, +stores ``/run/cloud-init/instance-data.json`` as a world-readable json file. +Because user-data and vendor-data can contain passwords both of these files +are readonly for *root* as well. The *root* user can also read +``/run/cloud-init/instance-data-sensitive.json`` which is all instance data +from instance-data.json as well as unredacted sensitive content. + + +Format of instance-data.json +============================ + +The instance-data.json and instance-data-sensitive.json files are well-formed +JSON and record the set of keys and values for any metadata processed by +cloud-init. Cloud-init standardizes the format for this content so that it +can be generalized across different cloud platforms. + +There are three basic top-level keys: + +* **base64_encoded_keys**: A list of forward-slash delimited key paths into + the instance-data.json object whose value is base64encoded for json + compatibility. Values at these paths should be decoded to get the original + value. + +* **sensitive_keys**: A list of forward-slash delimited key paths into + the instance-data.json object whose value is considered by the datasource as + 'security sensitive'. Only the keys listed here will be redacted from + instance-data.json for non-root users. + +* **ds**: Datasource-specific metadata crawled for the specific cloud + platform. It should closely represent the structure of the cloud metadata + crawled. The structure of content and details provided are entirely + cloud-dependent. Mileage will vary depending on what the cloud exposes. + The content exposed under the 'ds' key is currently **experimental** and + expected to change slightly in the upcoming cloud-init release. + +* **v1**: Standardized cloud-init metadata keys, these keys are guaranteed to + exist on all cloud platforms. They will also retain their current behavior + and format and will be carried forward even if cloud-init introduces a new + version of standardized keys with **v2**. + +The standardized keys present: + ++----------------------+-----------------------------------------------+---------------------------+ +| Key path | Description | Examples | ++======================+===============================================+===========================+ +| v1.cloud_name | The name of the cloud provided by metadata | aws, openstack, azure, | +| | key 'cloud-name' or the cloud-init datasource | configdrive, nocloud, | +| | name which was discovered. | ovf, etc. | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.instance_id | Unique instance_id allocated by the cloud | i- | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.local_hostname | The internal or local hostname of the system | ip-10-41-41-70, | +| | | | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.region | The physical region/datacenter in which the | us-east-2 | +| | instance is deployed | | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.availability_zone | The physical availability zone in which the | us-east-2b, nova, null | +| | instance is deployed | | ++----------------------+-----------------------------------------------+---------------------------+ + + +Below is an example of ``/run/cloud-init/instance_data.json`` on an EC2 +instance: + +.. sourcecode:: json + + { + "base64_encoded_keys": [], + "sensitive_keys": [], + "ds": { + "meta_data": { + "ami-id": "ami-014e1416b628b0cbf", + "ami-launch-index": "0", + "ami-manifest-path": "(unknown)", + "block-device-mapping": { + "ami": "/dev/sda1", + "ephemeral0": "sdb", + "ephemeral1": "sdc", + "root": "/dev/sda1" + }, + "hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "instance-action": "none", + "instance-id": "i-04fa31cfc55aa7976", + "instance-type": "t2.micro", + "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "local-ipv4": "10.41.41.70", + "mac": "06:b6:92:dd:9d:24", + "metrics": { + "vhostmd": "" + }, + "network": { + "interfaces": { + "macs": { + "06:b6:92:dd:9d:24": { + "device-number": "0", + "interface-id": "eni-08c0c9fdb99b6e6f4", + "ipv4-associations": { + "18.224.22.43": "10.41.41.70" + }, + "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "local-ipv4s": "10.41.41.70", + "mac": "06:b6:92:dd:9d:24", + "owner-id": "437526006925", + "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", + "public-ipv4s": "18.224.22.43", + "security-group-ids": "sg-828247e9", + "security-groups": "Cloud-init integration test secgroup", + "subnet-id": "subnet-282f3053", + "subnet-ipv4-cidr-block": "10.41.41.0/24", + "subnet-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/64", + "vpc-id": "vpc-252ef24d", + "vpc-ipv4-cidr-block": "10.41.0.0/16", + "vpc-ipv4-cidr-blocks": "10.41.0.0/16", + "vpc-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/56" + } + } + } + }, + "placement": { + "availability-zone": "us-east-2b" + }, + "profile": "default-hvm", + "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", + "public-ipv4": "18.224.22.43", + "public-keys": { + "cloud-init-integration": [ + "ssh-rsa + AAAAB3NzaC1yc2EAAAADAQABAAABAQDSL7uWGj8cgWyIOaspgKdVy0cKJ+UTjfv7jBOjG2H/GN8bJVXy72XAvnhM0dUM+CCs8FOf0YlPX+Frvz2hKInrmRhZVwRSL129PasD12MlI3l44u6IwS1o/W86Q+tkQYEljtqDOo0a+cOsaZkvUNzUyEXUwz/lmYa6G4hMKZH4NBj7nbAAF96wsMCoyNwbWryBnDYUr6wMbjRR1J9Pw7Xh7WRC73wy4Va2YuOgbD3V/5ZrFPLbWZW/7TFXVrql04QVbyei4aiFR5n//GvoqwQDNe58LmbzX/xvxyKJYdny2zXmdAhMxbrpFQsfpkJ9E/H5w0yOdSvnWbUoG5xNGoOB + cloud-init-integration" + ] + }, + "reservation-id": "r-06ab75e9346f54333", + "security-groups": "Cloud-init integration test secgroup", + "services": { + "domain": "amazonaws.com", + "partition": "aws" + } + } + }, + "v1": { + "availability-zone": "us-east-2b", + "availability_zone": "us-east-2b", + "cloud-name": "aws", + "cloud_name": "aws", + "instance-id": "i-04fa31cfc55aa7976", + "instance_id": "i-04fa31cfc55aa7976", + "local-hostname": "ip-10-41-41-70", + "local_hostname": "ip-10-41-41-70", + "region": "us-east-2" + } + } + + +Using instance-data +=================== + +As of cloud-init v. 18.4, any variables present in +``/run/cloud-init/instance-data.json`` can be used in: + +* User-data scripts +* Cloud config data +* Command line interface via **cloud-init query** or + **cloud-init devel render** + +Many clouds allow users to provide user-data to an instance at +the time the instance is launched. Cloud-init supports a number of +:ref:`user_data_formats`. + +Both user-data scripts and **#cloud-config** data support jinja template +rendering. +When the first line of the provided user-data begins with, +**## template: jinja** cloud-init will use jinja to render that file. +Any instance-data-sensitive.json variables are surfaced as dot-delimited +jinja template variables because cloud-config modules are run as 'root' +user. + + +Below are some examples of providing these types of user-data: + +* Cloud config calling home with the ec2 public hostname and avaliability-zone + +.. code-block:: shell-session + + ## template: jinja + #cloud-config + runcmd: + - echo 'EC2 public hostname allocated to instance: {{ + ds.meta_data.public_hostname }}' > /tmp/instance_metadata + - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> + /tmp/instance_metadata + - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", + "availability-zone": "{{ v1.availability_zone }}"}' + https://example.com + +* Custom user-data script performing different operations based on region + +.. code-block:: shell-session + + ## template: jinja + #!/bin/bash + {% if v1.region == 'us-east-2' -%} + echo 'Installing custom proxies for {{ v1.region }} + sudo apt-get install my-xtra-fast-stack + {%- endif %} + ... + +.. note:: + Trying to reference jinja variables that don't exist in + instance-data.json will result in warnings in ``/var/log/cloud-init.log`` + and the following string in your rendered user-data: + ``CI_MISSING_JINJA_VAR/``. + +Cloud-init also surfaces a commandline tool **cloud-init query** which can +assist developers or scripts with obtaining instance metadata easily. See +:ref:`cli_query` for more information. + +To cut down on keystrokes on the command line, cloud-init also provides +top-level key aliases for any standardized ``v#`` keys present. The preceding +``v1`` is not required of ``v1.var_name`` These aliases will represent the +value of the highest versioned standard key. For example, ``cloud_name`` +value will be ``v2.cloud_name`` if both ``v1`` and ``v2`` keys are present in +instance-data.json. +The **query** command also publishes ``userdata`` and ``vendordata`` keys to +the root user which will contain the decoded user and vendor data provided to +this instance. Non-root users referencing userdata or vendordata keys will +see only redacted values. + +.. code-block:: shell-session + + # List all top-level instance-data keys available + % cloud-init query --list-keys + + # Find your EC2 ami-id + % cloud-init query ds.metadata.ami_id + + # Format your cloud_name and region using jinja template syntax + % cloud-init query --format 'cloud: {{ v1.cloud_name }} myregion: {{ + % v1.region }}' + +.. note:: + To save time designing a user-data template for a specific cloud's + instance-data.json, use the 'render' cloud-init command on an + instance booted on your favorite cloud. See :ref:`cli_devel` for more + information. + +.. vi: textwidth=78 diff --git a/integration-requirements.txt b/integration-requirements.txt index f80cb942..880d9886 100644 --- a/integration-requirements.txt +++ b/integration-requirements.txt @@ -5,16 +5,17 @@ # the packages/pkg-deps.json file as well. # +unittest2 # ec2 backend boto3==1.5.9 # ssh communication paramiko==2.4.1 + # lxd backend # 04/03/2018: enables use of lxd 3.0 git+https://github.com/lxc/pylxd.git@4b8ab1802f9aee4eb29cf7b119dae0aa47150779 - # finds latest image information git+https://git.launchpad.net/simplestreams diff --git a/tests/cloud_tests/testcases/base.py b/tests/cloud_tests/testcases/base.py index 27458271..c5457968 100644 --- a/tests/cloud_tests/testcases/base.py +++ b/tests/cloud_tests/testcases/base.py @@ -5,15 +5,15 @@ import crypt import json import re -import unittest +import unittest2 from cloudinit import util as c_util -SkipTest = unittest.SkipTest +SkipTest = unittest2.SkipTest -class CloudTestCase(unittest.TestCase): +class CloudTestCase(unittest2.TestCase): """Base test class for verifiers.""" # data gets populated in get_suite.setUpClass @@ -167,8 +167,9 @@ class CloudTestCase(unittest.TestCase): 'Skipping instance-data.json test.' ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) - self.assertEqual( - ['ds/user_data'], instance_data['base64_encoded_keys']) + self.assertItemsEqual( + [], + instance_data['base64_encoded_keys']) ds = instance_data.get('ds', {}) v1_data = instance_data.get('v1', {}) metadata = ds.get('meta-data', {}) @@ -187,10 +188,10 @@ class CloudTestCase(unittest.TestCase): metadata.get('placement', {}).get('availability-zone'), 'Could not determine EC2 Availability zone placement') self.assertIsNotNone( - v1_data['availability-zone'], 'expected ec2 availability-zone') - self.assertEqual('aws', v1_data['cloud-name']) - self.assertIn('i-', v1_data['instance-id']) - self.assertIn('ip-', v1_data['local-hostname']) + v1_data['availability_zone'], 'expected ec2 availability_zone') + self.assertEqual('aws', v1_data['cloud_name']) + self.assertIn('i-', v1_data['instance_id']) + self.assertIn('ip-', v1_data['local_hostname']) self.assertIsNotNone(v1_data['region'], 'expected ec2 region') def test_instance_data_json_lxd(self): @@ -213,16 +214,14 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) - self.assertEqual( - ['ds/user_data', 'ds/vendor_data'], - sorted(instance_data['base64_encoded_keys'])) - self.assertEqual('nocloud', v1_data['cloud-name']) + self.assertItemsEqual([], sorted(instance_data['base64_encoded_keys'])) + self.assertEqual('nocloud', v1_data['cloud_name']) self.assertIsNone( - v1_data['availability-zone'], - 'found unexpected lxd availability-zone %s' % - v1_data['availability-zone']) - self.assertIn('cloud-test', v1_data['instance-id']) - self.assertIn('cloud-test', v1_data['local-hostname']) + v1_data['availability_zone'], + 'found unexpected lxd availability_zone %s' % + v1_data['availability_zone']) + self.assertIn('cloud-test', v1_data['instance_id']) + self.assertIn('cloud-test', v1_data['local_hostname']) self.assertIsNone( v1_data['region'], 'found unexpected lxd region %s' % v1_data['region']) @@ -248,18 +247,17 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) - self.assertEqual( - ['ds/user_data'], instance_data['base64_encoded_keys']) - self.assertEqual('nocloud', v1_data['cloud-name']) + self.assertItemsEqual([], instance_data['base64_encoded_keys']) + self.assertEqual('nocloud', v1_data['cloud_name']) self.assertIsNone( - v1_data['availability-zone'], - 'found unexpected kvm availability-zone %s' % - v1_data['availability-zone']) + v1_data['availability_zone'], + 'found unexpected kvm availability_zone %s' % + v1_data['availability_zone']) self.assertIsNotNone( re.match(r'[\da-f]{8}(-[\da-f]{4}){3}-[\da-f]{12}', - v1_data['instance-id']), - 'kvm instance-id is not a UUID: %s' % v1_data['instance-id']) - self.assertIn('ubuntu', v1_data['local-hostname']) + v1_data['instance_id']), + 'kvm instance_id is not a UUID: %s' % v1_data['instance_id']) + self.assertIn('ubuntu', v1_data['local_hostname']) self.assertIsNone( v1_data['region'], 'found unexpected lxd region %s' % v1_data['region']) -- cgit v1.2.3 From fd873fa83fb5ca9c6d5f22a7bec6a308448c77ff Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Wed, 26 Sep 2018 21:18:24 +0000 Subject: docs: surface experimental doc in instance-data.json --- cloudinit/sources/__init__.py | 9 +++++++-- cloudinit/sources/tests/test_init.py | 6 ++++-- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'cloudinit/sources/tests') diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 730e8174..5ac98826 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -38,6 +38,11 @@ DEP_FILESYSTEM = "FILESYSTEM" DEP_NETWORK = "NETWORK" DS_PREFIX = 'DataSource' +EXPERIMENTAL_TEXT = ( + "EXPERIMENTAL: The structure and format of content scoped under the 'ds'" + " key may change in subsequent releases of cloud-init.") + + # File in which public available instance meta-data is written # security-sensitive key values are redacted from this world-readable file INSTANCE_JSON_FILE = 'instance-data.json' @@ -243,8 +248,8 @@ class DataSource(object): @return True on successful write, False otherwise. """ instance_data = { - 'ds': { - 'meta_data': self.metadata}} + 'ds': {'_doc': EXPERIMENTAL_TEXT, + 'meta_data': self.metadata}} if hasattr(self, 'network_json'): network_json = getattr(self, 'network_json') if network_json != UNSET: diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index 6b965750..8082019e 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -10,8 +10,8 @@ from cloudinit.event import EventType from cloudinit.helpers import Paths from cloudinit import importer from cloudinit.sources import ( - INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE, - UNSET, DataSource, redact_sensitive_keys) + EXPERIMENTAL_TEXT, INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, + REDACT_SENSITIVE_VALUE, UNSET, DataSource, redact_sensitive_keys) from cloudinit.tests.helpers import CiTestCase, skipIf, mock from cloudinit.user_data import UserDataProcessor from cloudinit import util @@ -305,6 +305,7 @@ class TestDataSource(CiTestCase): 'local_hostname': 'test-subclass-hostname', 'region': 'myregion'}, 'ds': { + '_doc': EXPERIMENTAL_TEXT, 'meta_data': {'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', 'region': 'myregion'}}} @@ -348,6 +349,7 @@ class TestDataSource(CiTestCase): 'local_hostname': 'test-subclass-hostname', 'region': 'myregion'}, 'ds': { + '_doc': EXPERIMENTAL_TEXT, 'meta_data': { 'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', -- cgit v1.2.3