From fc4b966ba928b30b1c586407e752e0b51b1031e8 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Tue, 25 Sep 2018 21:59:16 +0000 Subject: cli: add cloud-init query subcommand to query instance metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cloud-init caches any cloud metadata crawled during boot in the file /run/cloud-init/instance-data.json. Cloud-init also standardizes some of that metadata across all clouds. The command 'cloud-init query' surfaces a simple CLI to query or format any cached instance metadata so that scripts or end-users do not have to write tools to crawl metadata themselves. Since 'cloud-init query' is runnable by non-root users, redact any sensitive data from instance-data.json and provide a root-readable unredacted instance-data-sensitive.json. Datasources can now define a sensitive_metadata_keys tuple which will redact any matching keys which could contain passwords or credentials from instance-data.json. Also add the following standardized 'v1' instance-data.json keys:   - user_data: The base64encoded user-data provided at instance launch   - vendor_data: Any vendor_data provided to the instance at launch   - underscore_delimited versions of existing hyphenated keys:     instance_id, local_hostname, availability_zone, cloud_name --- bash_completion/cloud-init | 4 +- cloudinit/cmd/devel/render.py | 7 +- cloudinit/cmd/main.py | 10 ++ cloudinit/cmd/query.py | 155 ++++++++++++++++++ cloudinit/cmd/tests/test_query.py | 193 +++++++++++++++++++++++ cloudinit/helpers.py | 4 + cloudinit/sources/__init__.py | 76 +++++++-- cloudinit/sources/tests/test_init.py | 130 ++++++++++++--- doc/rtd/index.rst | 1 + doc/rtd/topics/capabilities.rst | 105 ++++++++++--- doc/rtd/topics/datasources.rst | 148 +---------------- doc/rtd/topics/instancedata.rst | 297 +++++++++++++++++++++++++++++++++++ integration-requirements.txt | 3 +- tests/cloud_tests/testcases/base.py | 52 +++--- 14 files changed, 952 insertions(+), 233 deletions(-) create mode 100644 cloudinit/cmd/query.py create mode 100644 cloudinit/cmd/tests/test_query.py create mode 100644 doc/rtd/topics/instancedata.rst diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init index 6d01bf3a..8c25032f 100644 --- a/bash_completion/cloud-init +++ b/bash_completion/cloud-init @@ -10,7 +10,7 @@ _cloudinit_complete() cur_word="${COMP_WORDS[COMP_CWORD]}" prev_word="${COMP_WORDS[COMP_CWORD-1]}" - subcmds="analyze clean collect-logs devel dhclient-hook features init modules single status" + subcmds="analyze clean collect-logs devel dhclient-hook features init modules query single status" base_params="--help --file --version --debug --force" case ${COMP_CWORD} in 1) @@ -40,6 +40,8 @@ _cloudinit_complete() COMPREPLY=($(compgen -W "--help --mode" -- $cur_word)) ;; + query) + COMPREPLY=($(compgen -W "--all --help --instance-data --list-keys --user-data --vendor-data --debug" -- $cur_word));; single) COMPREPLY=($(compgen -W "--help --name --frequency --report" -- $cur_word)) ;; diff --git a/cloudinit/cmd/devel/render.py b/cloudinit/cmd/devel/render.py index e85933db..2ba6b681 100755 --- a/cloudinit/cmd/devel/render.py +++ b/cloudinit/cmd/devel/render.py @@ -9,7 +9,6 @@ import sys from cloudinit.handlers.jinja_template import render_jinja_payload_from_file from cloudinit import log from cloudinit.sources import INSTANCE_JSON_FILE -from cloudinit import util from . import addLogHandlerCLI, read_cfg_paths NAME = 'render' @@ -54,11 +53,7 @@ def handle_args(name, args): paths.run_dir, INSTANCE_JSON_FILE) else: instance_data_fn = args.instance_data - try: - with open(instance_data_fn) as stream: - instance_data = stream.read() - instance_data = util.load_json(instance_data) - except IOError: + if not os.path.exists(instance_data_fn): LOG.error('Missing instance-data.json file: %s', instance_data_fn) return 1 try: diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 0eee583c..5a437020 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -791,6 +791,10 @@ def main(sysv_args=None): ' pass to this module')) parser_single.set_defaults(action=('single', main_single)) + parser_query = subparsers.add_parser( + 'query', + help='Query standardized instance metadata from the command line.') + parser_dhclient = subparsers.add_parser('dhclient-hook', help=('run the dhclient hook' 'to record network info')) @@ -842,6 +846,12 @@ def main(sysv_args=None): clean_parser(parser_clean) parser_clean.set_defaults( action=('clean', handle_clean_args)) + elif sysv_args[0] == 'query': + from cloudinit.cmd.query import ( + get_parser as query_parser, handle_args as handle_query_args) + query_parser(parser_query) + parser_query.set_defaults( + action=('render', handle_query_args)) elif sysv_args[0] == 'status': from cloudinit.cmd.status import ( get_parser as status_parser, handle_status_args) diff --git a/cloudinit/cmd/query.py b/cloudinit/cmd/query.py new file mode 100644 index 00000000..7d2d4fe4 --- /dev/null +++ b/cloudinit/cmd/query.py @@ -0,0 +1,155 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Query standardized instance metadata from the command line.""" + +import argparse +import os +import six +import sys + +from cloudinit.handlers.jinja_template import ( + convert_jinja_instance_data, render_jinja_payload) +from cloudinit.cmd.devel import addLogHandlerCLI, read_cfg_paths +from cloudinit import log +from cloudinit.sources import ( + INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE) +from cloudinit import util + +NAME = 'query' +LOG = log.getLogger(NAME) + + +def get_parser(parser=None): + """Build or extend an arg parser for query utility. + + @param parser: Optional existing ArgumentParser instance representing the + query subcommand which will be extended to support the args of + this utility. + + @returns: ArgumentParser with proper argument configuration. + """ + if not parser: + parser = argparse.ArgumentParser( + prog=NAME, description='Query cloud-init instance data') + parser.add_argument( + '-d', '--debug', action='store_true', default=False, + help='Add verbose messages during template render') + parser.add_argument( + '-i', '--instance-data', type=str, + help=('Path to instance-data.json file. Default is /run/cloud-init/%s' + % INSTANCE_JSON_FILE)) + parser.add_argument( + '-l', '--list-keys', action='store_true', default=False, + help=('List query keys available at the provided instance-data' + ' .')) + parser.add_argument( + '-u', '--user-data', type=str, + help=('Path to user-data file. Default is' + ' /var/lib/cloud/instance/user-data.txt')) + parser.add_argument( + '-v', '--vendor-data', type=str, + help=('Path to vendor-data file. Default is' + ' /var/lib/cloud/instance/vendor-data.txt')) + parser.add_argument( + 'varname', type=str, nargs='?', + help=('A dot-delimited instance data variable to query from' + ' instance-data query. For example: v2.local_hostname')) + parser.add_argument( + '-a', '--all', action='store_true', default=False, dest='dump_all', + help='Dump all available instance-data') + parser.add_argument( + '-f', '--format', type=str, dest='format', + help=('Optionally specify a custom output format string. Any' + ' instance-data variable can be specified between double-curly' + ' braces. For example -f "{{ v2.cloud_name }}"')) + return parser + + +def handle_args(name, args): + """Handle calls to 'cloud-init query' as a subcommand.""" + paths = None + addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING) + if not any([args.list_keys, args.varname, args.format, args.dump_all]): + LOG.error( + 'Expected one of the options: --all, --format,' + ' --list-keys or varname') + get_parser().print_help() + return 1 + + uid = os.getuid() + if not all([args.instance_data, args.user_data, args.vendor_data]): + paths = read_cfg_paths() + if not args.instance_data: + if uid == 0: + default_json_fn = INSTANCE_JSON_SENSITIVE_FILE + else: + default_json_fn = INSTANCE_JSON_FILE # World readable + instance_data_fn = os.path.join(paths.run_dir, default_json_fn) + else: + instance_data_fn = args.instance_data + if not args.user_data: + user_data_fn = os.path.join(paths.instance_link, 'user-data.txt') + else: + user_data_fn = args.user_data + if not args.vendor_data: + vendor_data_fn = os.path.join(paths.instance_link, 'vendor-data.txt') + else: + vendor_data_fn = args.vendor_data + + try: + instance_json = util.load_file(instance_data_fn) + except IOError: + LOG.error('Missing instance-data.json file: %s', instance_data_fn) + return 1 + + instance_data = util.load_json(instance_json) + if uid != 0: + instance_data['userdata'] = ( + '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, user_data_fn)) + instance_data['vendordata'] = ( + '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, vendor_data_fn)) + else: + instance_data['userdata'] = util.load_file(user_data_fn) + instance_data['vendordata'] = util.load_file(vendor_data_fn) + if args.format: + payload = '## template: jinja\n{fmt}'.format(fmt=args.format) + rendered_payload = render_jinja_payload( + payload=payload, payload_fn='query commandline', + instance_data=instance_data, + debug=True if args.debug else False) + if rendered_payload: + print(rendered_payload) + return 0 + return 1 + + response = convert_jinja_instance_data(instance_data) + if args.varname: + try: + for var in args.varname.split('.'): + response = response[var] + except KeyError: + LOG.error('Undefined instance-data key %s', args.varname) + return 1 + if args.list_keys: + if not isinstance(response, dict): + LOG.error("--list-keys provided but '%s' is not a dict", var) + return 1 + response = '\n'.join(sorted(response.keys())) + elif args.list_keys: + response = '\n'.join(sorted(response.keys())) + if not isinstance(response, six.string_types): + response = util.json_dumps(response) + print(response) + return 0 + + +def main(): + """Tool to query specific instance-data values.""" + parser = get_parser() + sys.exit(handle_args(NAME, parser.parse_args())) + + +if __name__ == '__main__': + main() + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/tests/test_query.py b/cloudinit/cmd/tests/test_query.py new file mode 100644 index 00000000..fb87c6ab --- /dev/null +++ b/cloudinit/cmd/tests/test_query.py @@ -0,0 +1,193 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from six import StringIO +from textwrap import dedent +import os + +from collections import namedtuple +from cloudinit.cmd import query +from cloudinit.helpers import Paths +from cloudinit.sources import REDACT_SENSITIVE_VALUE, INSTANCE_JSON_FILE +from cloudinit.tests.helpers import CiTestCase, mock +from cloudinit.util import ensure_dir, write_file + + +class TestQuery(CiTestCase): + + with_logs = True + + args = namedtuple( + 'queryargs', + ('debug dump_all format instance_data list_keys user_data vendor_data' + ' varname')) + + def setUp(self): + super(TestQuery, self).setUp() + self.tmp = self.tmp_dir() + self.instance_data = self.tmp_path('instance-data', dir=self.tmp) + + def test_handle_args_error_on_missing_param(self): + """Error when missing required parameters and print usage.""" + args = self.args( + debug=False, dump_all=False, format=None, instance_data=None, + list_keys=False, user_data=None, vendor_data=None, varname=None) + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(1, query.handle_args('anyname', args)) + expected_error = ( + 'ERROR: Expected one of the options: --all, --format, --list-keys' + ' or varname\n') + self.assertIn(expected_error, self.logs.getvalue()) + self.assertIn('usage: query', m_stdout.getvalue()) + self.assertIn(expected_error, m_stderr.getvalue()) + + def test_handle_args_error_on_missing_instance_data(self): + """When instance_data file path does not exist, log an error.""" + absent_fn = self.tmp_path('absent', dir=self.tmp) + args = self.args( + debug=False, dump_all=True, format=None, instance_data=absent_fn, + list_keys=False, user_data='ud', vendor_data='vd', varname=None) + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + self.assertEqual(1, query.handle_args('anyname', args)) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % absent_fn, + self.logs.getvalue()) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % absent_fn, + m_stderr.getvalue()) + + def test_handle_args_defaults_instance_data(self): + """When no instance_data argument, default to configured run_dir.""" + args = self.args( + debug=False, dump_all=True, format=None, instance_data=None, + list_keys=False, user_data=None, vendor_data=None, varname=None) + run_dir = self.tmp_path('run_dir', dir=self.tmp) + ensure_dir(run_dir) + paths = Paths({'run_dir': run_dir}) + self.add_patch('cloudinit.cmd.query.read_cfg_paths', 'm_paths') + self.m_paths.return_value = paths + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + self.assertEqual(1, query.handle_args('anyname', args)) + json_file = os.path.join(run_dir, INSTANCE_JSON_FILE) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % json_file, + self.logs.getvalue()) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % json_file, + m_stderr.getvalue()) + + def test_handle_args_dumps_all_instance_data(self): + """When --all is specified query will dump all instance data vars.""" + write_file(self.instance_data, '{"my-var": "it worked"}') + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, list_keys=False, + user_data='ud', vendor_data='vd', varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual( + '{\n "my_var": "it worked",\n "userdata": "<%s> file:ud",\n' + ' "vendordata": "<%s> file:vd"\n}\n' % ( + REDACT_SENSITIVE_VALUE, REDACT_SENSITIVE_VALUE), + m_stdout.getvalue()) + + def test_handle_args_returns_top_level_varname(self): + """When the argument varname is passed, report its value.""" + write_file(self.instance_data, '{"my-var": "it worked"}') + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, list_keys=False, + user_data='ud', vendor_data='vd', varname='my_var') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual('it worked\n', m_stdout.getvalue()) + + def test_handle_args_returns_nested_varname(self): + """If user_data file is a jinja template render instance-data vars.""" + write_file(self.instance_data, + '{"v1": {"key-2": "value-2"}, "my-var": "it worked"}') + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, user_data='ud', vendor_data='vd', + list_keys=False, varname='v1.key_2') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual('value-2\n', m_stdout.getvalue()) + + def test_handle_args_returns_standardized_vars_to_top_level_aliases(self): + """Any standardized vars under v# are promoted as top-level aliases.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},' + ' "top": "gun"}') + expected = dedent("""\ + { + "top": "gun", + "userdata": " file:ud", + "v1": { + "v1_1": "val1.1" + }, + "v1_1": "val1.1", + "v2": { + "v2_2": "val2.2" + }, + "v2_2": "val2.2", + "vendordata": " file:vd" + } + """) + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, user_data='ud', vendor_data='vd', + list_keys=False, varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_sorts_top_level_keys_when_no_varname(self): + """Sort all top-level keys when only --list-keys provided.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},' + ' "top": "gun"}') + expected = 'top\nuserdata\nv1\nv1_1\nv2\nv2_2\nvendordata\n' + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, user_data='ud', + vendor_data='vd', varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_sorts_nested_keys_when_varname(self): + """Sort all nested keys of varname object when --list-keys provided.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2":' + + ' {"v2_2": "val2.2"}, "top": "gun"}') + expected = 'v1_1\nv1_2\n' + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, + user_data='ud', vendor_data='vd', varname='v1') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_errors_when_varname_is_not_a_dict(self): + """Raise an error when --list-keys and varname specify a non-list.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2": ' + + '{"v2_2": "val2.2"}, "top": "gun"}') + expected_error = "ERROR: --list-keys provided but 'top' is not a dict" + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, user_data='ud', + vendor_data='vd', varname='top') + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(1, query.handle_args('anyname', args)) + self.assertEqual('', m_stdout.getvalue()) + self.assertIn(expected_error, m_stderr.getvalue()) + +# vi: ts=4 expandtab diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py index 3cc1fb19..dcd2645e 100644 --- a/cloudinit/helpers.py +++ b/cloudinit/helpers.py @@ -239,6 +239,10 @@ class ConfigMerger(object): if cc_fn and os.path.isfile(cc_fn): try: i_cfgs.append(util.read_conf(cc_fn)) + except PermissionError: + LOG.debug( + 'Skipped loading cloud-config from %s due to' + ' non-root.', cc_fn) except Exception: util.logexc(LOG, 'Failed loading of cloud-config from %s', cc_fn) diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index a775f1a8..730e8174 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -38,8 +38,12 @@ DEP_FILESYSTEM = "FILESYSTEM" DEP_NETWORK = "NETWORK" DS_PREFIX = 'DataSource' -# File in which instance meta-data, user-data and vendor-data is written +# File in which public available instance meta-data is written +# security-sensitive key values are redacted from this world-readable file INSTANCE_JSON_FILE = 'instance-data.json' +# security-sensitive key values are present in this root-readable file +INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json' +REDACT_SENSITIVE_VALUE = 'redacted for non-root user' # Key which can be provide a cloud's official product name to cloud-init METADATA_CLOUD_NAME_KEY = 'cloud-name' @@ -58,7 +62,7 @@ class InvalidMetaDataException(Exception): pass -def process_instance_metadata(metadata, key_path=''): +def process_instance_metadata(metadata, key_path='', sensitive_keys=()): """Process all instance metadata cleaning it up for persisting as json. Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list @@ -67,22 +71,46 @@ def process_instance_metadata(metadata, key_path=''): """ md_copy = copy.deepcopy(metadata) md_copy['base64_encoded_keys'] = [] + md_copy['sensitive_keys'] = [] for key, val in metadata.items(): if key_path: sub_key_path = key_path + '/' + key else: sub_key_path = key + if key in sensitive_keys or sub_key_path in sensitive_keys: + md_copy['sensitive_keys'].append(sub_key_path) if isinstance(val, str) and val.startswith('ci-b64:'): md_copy['base64_encoded_keys'].append(sub_key_path) md_copy[key] = val.replace('ci-b64:', '') if isinstance(val, dict): - return_val = process_instance_metadata(val, sub_key_path) + return_val = process_instance_metadata( + val, sub_key_path, sensitive_keys) md_copy['base64_encoded_keys'].extend( return_val.pop('base64_encoded_keys')) + md_copy['sensitive_keys'].extend( + return_val.pop('sensitive_keys')) md_copy[key] = return_val return md_copy +def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE): + """Redact any sensitive keys from to provided metadata dictionary. + + Replace any keys values listed in 'sensitive_keys' with redact_value. + """ + if not metadata.get('sensitive_keys', []): + return metadata + md_copy = copy.deepcopy(metadata) + for key_path in metadata.get('sensitive_keys'): + path_parts = key_path.split('/') + obj = md_copy + for path in path_parts: + if isinstance(obj[path], dict) and path != path_parts[-1]: + obj = obj[path] + obj[path] = redact_value + return md_copy + + URLParams = namedtuple( 'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries']) @@ -127,6 +155,10 @@ class DataSource(object): _dirty_cache = False + # N-tuple of keypaths or keynames redact from instance-data.json for + # non-root users + sensitive_metadata_keys = ('security-credentials',) + def __init__(self, sys_cfg, distro, paths, ud_proc=None): self.sys_cfg = sys_cfg self.distro = distro @@ -152,12 +184,24 @@ class DataSource(object): def _get_standardized_metadata(self): """Return a dictionary of standardized metadata keys.""" - return {'v1': { - 'local-hostname': self.get_hostname(), - 'instance-id': self.get_instance_id(), - 'cloud-name': self.cloud_name, - 'region': self.region, - 'availability-zone': self.availability_zone}} + local_hostname = self.get_hostname() + instance_id = self.get_instance_id() + availability_zone = self.availability_zone + cloud_name = self.cloud_name + # When adding new standard keys prefer underscore-delimited instead + # of hyphen-delimted to support simple variable references in jinja + # templates. + return { + 'v1': { + 'availability-zone': availability_zone, + 'availability_zone': availability_zone, + 'cloud-name': cloud_name, + 'cloud_name': cloud_name, + 'instance-id': instance_id, + 'instance_id': instance_id, + 'local-hostname': local_hostname, + 'local_hostname': local_hostname, + 'region': self.region}} def clear_cached_attrs(self, attr_defaults=()): """Reset any cached metadata attributes to datasource defaults. @@ -200,9 +244,7 @@ class DataSource(object): """ instance_data = { 'ds': { - 'meta_data': self.metadata, - 'user_data': self.get_userdata_raw(), - 'vendor_data': self.get_vendordata_raw()}} + 'meta_data': self.metadata}} if hasattr(self, 'network_json'): network_json = getattr(self, 'network_json') if network_json != UNSET: @@ -217,7 +259,9 @@ class DataSource(object): # Process content base64encoding unserializable values content = util.json_dumps(instance_data) # Strip base64: prefix and set base64_encoded_keys list. - processed_data = process_instance_metadata(json.loads(content)) + processed_data = process_instance_metadata( + json.loads(content), + sensitive_keys=self.sensitive_metadata_keys) except TypeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) return False @@ -225,7 +269,11 @@ class DataSource(object): LOG.warning('Error persisting instance-data.json: %s', str(e)) return False json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) - write_json(json_file, processed_data, mode=0o600) + write_json(json_file, processed_data) # World readable + json_sensitive_file = os.path.join(self.paths.run_dir, + INSTANCE_JSON_SENSITIVE_FILE) + write_json(json_sensitive_file, + redact_sensitive_keys(processed_data), mode=0o600) return True def _get_data(self): diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index 8299af23..6b965750 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -1,5 +1,6 @@ # This file is part of cloud-init. See LICENSE file for license information. +import copy import inspect import os import six @@ -9,7 +10,8 @@ from cloudinit.event import EventType from cloudinit.helpers import Paths from cloudinit import importer from cloudinit.sources import ( - INSTANCE_JSON_FILE, DataSource, UNSET) + INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE, + UNSET, DataSource, redact_sensitive_keys) from cloudinit.tests.helpers import CiTestCase, skipIf, mock from cloudinit.user_data import UserDataProcessor from cloudinit import util @@ -20,20 +22,24 @@ class DataSourceTestSubclassNet(DataSource): dsname = 'MyTestSubclass' url_max_wait = 55 - def __init__(self, sys_cfg, distro, paths, custom_userdata=None, - get_data_retval=True): + def __init__(self, sys_cfg, distro, paths, custom_metadata=None, + custom_userdata=None, get_data_retval=True): super(DataSourceTestSubclassNet, self).__init__( sys_cfg, distro, paths) self._custom_userdata = custom_userdata + self._custom_metadata = custom_metadata self._get_data_retval = get_data_retval def _get_cloud_name(self): return 'SubclassCloudName' def _get_data(self): - self.metadata = {'availability_zone': 'myaz', - 'local-hostname': 'test-subclass-hostname', - 'region': 'myregion'} + if self._custom_metadata: + self.metadata = self._custom_metadata + else: + self.metadata = {'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion'} if self._custom_userdata: self.userdata_raw = self._custom_userdata else: @@ -278,7 +284,7 @@ class TestDataSource(CiTestCase): os.path.exists(json_file), 'Found unexpected file %s' % json_file) def test_get_data_writes_json_instance_data_on_success(self): - """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root.""" + """get_data writes INSTANCE_JSON_FILE to run_dir as world readable.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp})) @@ -287,40 +293,90 @@ class TestDataSource(CiTestCase): content = util.load_file(json_file) expected = { 'base64_encoded_keys': [], + 'sensitive_keys': [], 'v1': { 'availability-zone': 'myaz', + 'availability_zone': 'myaz', 'cloud-name': 'subclasscloudname', + 'cloud_name': 'subclasscloudname', 'instance-id': 'iid-datasource', + 'instance_id': 'iid-datasource', 'local-hostname': 'test-subclass-hostname', + 'local_hostname': 'test-subclass-hostname', 'region': 'myregion'}, 'ds': { 'meta_data': {'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', - 'region': 'myregion'}, - 'user_data': 'userdata_raw', - 'vendor_data': 'vendordata_raw'}} - self.maxDiff = None + 'region': 'myregion'}}} self.assertEqual(expected, util.load_json(content)) file_stat = os.stat(json_file) + self.assertEqual(0o644, stat.S_IMODE(file_stat.st_mode)) + self.assertEqual(expected, util.load_json(content)) + + def test_get_data_writes_json_instance_data_sensitive(self): + """get_data writes INSTANCE_JSON_SENSITIVE_FILE as readonly root.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp}), + custom_metadata={ + 'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion', + 'some': {'security-credentials': { + 'cred1': 'sekret', 'cred2': 'othersekret'}}}) + self.assertEqual( + ('security-credentials',), datasource.sensitive_metadata_keys) + datasource.get_data() + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + sensitive_json_file = self.tmp_path(INSTANCE_JSON_SENSITIVE_FILE, tmp) + redacted = util.load_json(util.load_file(json_file)) + self.assertEqual( + {'cred1': 'sekret', 'cred2': 'othersekret'}, + redacted['ds']['meta_data']['some']['security-credentials']) + content = util.load_file(sensitive_json_file) + expected = { + 'base64_encoded_keys': [], + 'sensitive_keys': ['ds/meta_data/some/security-credentials'], + 'v1': { + 'availability-zone': 'myaz', + 'availability_zone': 'myaz', + 'cloud-name': 'subclasscloudname', + 'cloud_name': 'subclasscloudname', + 'instance-id': 'iid-datasource', + 'instance_id': 'iid-datasource', + 'local-hostname': 'test-subclass-hostname', + 'local_hostname': 'test-subclass-hostname', + 'region': 'myregion'}, + 'ds': { + 'meta_data': { + 'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion', + 'some': {'security-credentials': REDACT_SENSITIVE_VALUE}}} + } + self.maxDiff = None + self.assertEqual(expected, util.load_json(content)) + file_stat = os.stat(sensitive_json_file) self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode)) + self.assertEqual(expected, util.load_json(content)) def test_get_data_handles_redacted_unserializable_content(self): """get_data warns unserializable content in INSTANCE_JSON_FILE.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) datasource.get_data() json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) - expected_userdata = { + expected_metadata = { 'key1': 'val1', 'key2': { 'key2.1': "Warning: redacted unserializable type "}} instance_json = util.load_json(content) self.assertEqual( - expected_userdata, instance_json['ds']['user_data']) + expected_metadata, instance_json['ds']['meta_data']) def test_persist_instance_data_writes_ec2_metadata_when_set(self): """When ec2_metadata class attribute is set, persist to json.""" @@ -361,17 +417,17 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) instance_json = util.load_json(content) - self.assertEqual( - ['ds/user_data/key2/key2.1'], + self.assertItemsEqual( + ['ds/meta_data/key2/key2.1'], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': 'EjM='}}, - instance_json['ds']['user_data']) + instance_json['ds']['meta_data']) @skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes") def test_get_data_handles_bytes_values(self): @@ -379,7 +435,7 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) @@ -387,7 +443,7 @@ class TestDataSource(CiTestCase): self.assertEqual([], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': '\x123'}}, - instance_json['ds']['user_data']) + instance_json['ds']['meta_data']) @skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8") def test_non_utf8_encoding_logs_warning(self): @@ -395,7 +451,7 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) self.assertFalse(os.path.exists(json_file)) @@ -509,4 +565,36 @@ class TestDataSource(CiTestCase): self.logs.getvalue()) +class TestRedactSensitiveData(CiTestCase): + + def test_redact_sensitive_data_noop_when_no_sensitive_keys_present(self): + """When sensitive_keys is absent or empty from metadata do nothing.""" + md = {'my': 'data'} + self.assertEqual( + md, redact_sensitive_keys(md, redact_value='redacted')) + md['sensitive_keys'] = [] + self.assertEqual( + md, redact_sensitive_keys(md, redact_value='redacted')) + + def test_redact_sensitive_data_redacts_exact_match_name(self): + """Only exact matched sensitive_keys are redacted from metadata.""" + md = {'sensitive_keys': ['md/secure'], + 'md': {'secure': 's3kr1t', 'insecure': 'publik'}} + secure_md = copy.deepcopy(md) + secure_md['md']['secure'] = 'redacted' + self.assertEqual( + secure_md, + redact_sensitive_keys(md, redact_value='redacted')) + + def test_redact_sensitive_data_does_redacts_with_default_string(self): + """When redact_value is absent, REDACT_SENSITIVE_VALUE is used.""" + md = {'sensitive_keys': ['md/secure'], + 'md': {'secure': 's3kr1t', 'insecure': 'publik'}} + secure_md = copy.deepcopy(md) + secure_md['md']['secure'] = 'redacted for non-root user' + self.assertEqual( + secure_md, + redact_sensitive_keys(md)) + + # vi: ts=4 expandtab diff --git a/doc/rtd/index.rst b/doc/rtd/index.rst index de67f361..20a99a30 100644 --- a/doc/rtd/index.rst +++ b/doc/rtd/index.rst @@ -31,6 +31,7 @@ initialization of a cloud instance. topics/capabilities.rst topics/availability.rst topics/format.rst + topics/instancedata.rst topics/dir_layout.rst topics/examples.rst topics/boot.rst diff --git a/doc/rtd/topics/capabilities.rst b/doc/rtd/topics/capabilities.rst index 2d8e2538..0d8b8947 100644 --- a/doc/rtd/topics/capabilities.rst +++ b/doc/rtd/topics/capabilities.rst @@ -18,7 +18,7 @@ User configurability User-data can be given by the user at instance launch time. See :ref:`user_data_formats` for acceptable user-data content. - + This is done via the ``--user-data`` or ``--user-data-file`` argument to ec2-run-instances for example. @@ -53,10 +53,9 @@ system: % cloud-init --help usage: cloud-init [-h] [--version] [--file FILES] - [--debug] [--force] - {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} - ... + {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status} + ... optional arguments: -h, --help show this help message and exit @@ -68,17 +67,19 @@ system: your own risk) Subcommands: - {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} + {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status} init initializes cloud-init and performs initial modules modules activates modules using a given configuration key single run a single module + query Query instance metadata from the command line dhclient-hook run the dhclient hookto record network info features list defined features analyze Devel tool: Analyze cloud-init logs and data devel Run development tools collect-logs Collect and tar all cloud-init debug info - clean Remove logs and artifacts so cloud-init can re-run. - status Report cloud-init status or wait on completion. + clean Remove logs and artifacts so cloud-init can re-run + status Report cloud-init status or wait on completion + CLI Subcommand details ====================== @@ -104,8 +105,8 @@ cloud-init status Report whether cloud-init is running, done, disabled or errored. Exits non-zero if an error is detected in cloud-init. - * **--long**: Detailed status information. - * **--wait**: Block until cloud-init completes. +* **--long**: Detailed status information. +* **--wait**: Block until cloud-init completes. .. code-block:: shell-session @@ -143,6 +144,68 @@ Logs collected are: * journalctl output * /var/lib/cloud/instance/user-data.txt +.. _cli_query: + +cloud-init query +------------------ +Query standardized cloud instance metadata crawled by cloud-init and stored +in ``/run/cloud-init/instance-data.json``. This is a convenience command-line +interface to reference any cached configuration metadata that cloud-init +crawls when booting the instance. See :ref:`instance_metadata` for more info. + +* **--all**: Dump all available instance data as json which can be queried. +* **--instance-data**: Optional path to a different instance-data.json file to + source for queries. +* **--list-keys**: List available query keys from cached instance data. + +.. code-block:: shell-session + + # List all top-level query keys available (includes standardized aliases) + % cloud-init query --list-keys + availability_zone + base64_encoded_keys + cloud_name + ds + instance_id + local_hostname + region + v1 + +* ****: A dot-delimited variable path into the instance-data.json + object. + +.. code-block:: shell-session + + # Query cloud-init standardized metadata on any cloud + % cloud-init query v1.cloud_name + aws # or openstack, azure, gce etc. + + # Any standardized instance-data under a key is aliased as a top-level + # key for convenience. + % cloud-init query cloud_name + aws # or openstack, azure, gce etc. + + # Query datasource-specific metadata on EC2 + % cloud-init query ds.meta_data.public_ipv4 + +* **--format** A string that will use jinja-template syntax to render a string + replacing + +.. code-block:: shell-session + + # Generate a custom hostname fqdn based on instance-id, cloud and region + % cloud-init query --format 'custom-{{instance_id}}.{{region}}.{{v1.cloud_name}}.com' + custom-i-0e91f69987f37ec74.us-east-2.aws.com + + +.. note:: + The standardized instance data keys under **v#** are guaranteed not to change + behavior or format. If using top-level convenience aliases for any + standardized instance data keys, the most value (highest **v#**) of that key + name is what is reported as the top-level value. So these aliases act as a + 'latest'. + + .. _cli_analyze: cloud-init analyze @@ -150,10 +213,10 @@ cloud-init analyze Get detailed reports of where cloud-init spends most of its time. See :ref:`boot_time_analysis` for more info. - * **blame** Report ordered by most costly operations. - * **dump** Machine-readable JSON dump of all cloud-init tracked events. - * **show** show time-ordered report of the cost of operations during each - boot stage. +* **blame** Report ordered by most costly operations. +* **dump** Machine-readable JSON dump of all cloud-init tracked events. +* **show** show time-ordered report of the cost of operations during each + boot stage. .. _cli_devel: @@ -182,8 +245,8 @@ cloud-init clean Remove cloud-init artifacts from /var/lib/cloud and optionally reboot the machine to so cloud-init re-runs all stages as it did on first boot. - * **--logs**: Optionally remove /var/log/cloud-init*log files. - * **--reboot**: Reboot the system after removing artifacts. +* **--logs**: Optionally remove /var/log/cloud-init*log files. +* **--reboot**: Reboot the system after removing artifacts. .. _cli_init: @@ -195,7 +258,7 @@ Can be run on the commandline, but is generally gated to run only once due to semaphores in **/var/lib/cloud/instance/sem/** and **/var/lib/cloud/sem**. - * **--local**: Run *init-local* stage instead of *init*. +* **--local**: Run *init-local* stage instead of *init*. .. _cli_modules: @@ -210,8 +273,8 @@ declared to run in various boot stages in the file commandline, but each module is gated to run only once due to semaphores in ``/var/lib/cloud/``. - * **--mode (init|config|final)**: Run *modules:init*, *modules:config* or - *modules:final* cloud-init stages. See :ref:`boot_stages` for more info. +* **--mode (init|config|final)**: Run *modules:init*, *modules:config* or + *modules:final* cloud-init stages. See :ref:`boot_stages` for more info. .. _cli_single: @@ -221,9 +284,9 @@ Attempt to run a single named cloud config module. The following example re-runs the cc_set_hostname module ignoring the module default frequency of once-per-instance: - * **--name**: The cloud-config module name to run - * **--frequency**: Optionally override the declared module frequency - with one of (always|once-per-instance|once) +* **--name**: The cloud-config module name to run +* **--frequency**: Optionally override the declared module frequency + with one of (always|once-per-instance|once) .. code-block:: shell-session diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index 14432e65..e34f145c 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -17,146 +17,10 @@ own way) internally a datasource abstract class was created to allow for a single way to access the different cloud systems methods to provide this data through the typical usage of subclasses. - -.. _instance_metadata: - -instance-data -------------- -For reference, cloud-init stores all the metadata, vendordata and userdata -provided by a cloud in a json blob at ``/run/cloud-init/instance-data.json``. -While the json contains datasource-specific keys and names, cloud-init will -maintain a minimal set of standardized keys that will remain stable on any -cloud. Standardized instance-data keys will be present under a "v1" key. -Any datasource metadata cloud-init consumes will all be present under the -"ds" key. - -Below is an instance-data.json example from an OpenStack instance: - -.. sourcecode:: json - - { - "base64-encoded-keys": [ - "ds/meta-data/random_seed", - "ds/user-data" - ], - "ds": { - "ec2_metadata": { - "ami-id": "ami-0000032f", - "ami-launch-index": "0", - "ami-manifest-path": "FIXME", - "block-device-mapping": { - "ami": "vda", - "ephemeral0": "/dev/vdb", - "root": "/dev/vda" - }, - "hostname": "xenial-test.novalocal", - "instance-action": "none", - "instance-id": "i-0006e030", - "instance-type": "m1.small", - "local-hostname": "xenial-test.novalocal", - "local-ipv4": "10.5.0.6", - "placement": { - "availability-zone": "None" - }, - "public-hostname": "xenial-test.novalocal", - "public-ipv4": "10.245.162.145", - "reservation-id": "r-fxm623oa", - "security-groups": "default" - }, - "meta-data": { - "availability_zone": null, - "devices": [], - "hostname": "xenial-test.novalocal", - "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0", - "launch_index": 0, - "local-hostname": "xenial-test.novalocal", - "name": "xenial-test", - "project_id": "e0eb2d2538814...", - "random_seed": "A6yPN...", - "uuid": "3e39d278-0644-4728-9479-678f92..." - }, - "network_json": { - "links": [ - { - "ethernet_mac_address": "fa:16:3e:7d:74:9b", - "id": "tap9ca524d5-6e", - "mtu": 8958, - "type": "ovs", - "vif_id": "9ca524d5-6e5a-4809-936a-6901..." - } - ], - "networks": [ - { - "id": "network0", - "link": "tap9ca524d5-6e", - "network_id": "c6adfc18-9753-42eb-b3ea-18b57e6b837f", - "type": "ipv4_dhcp" - } - ], - "services": [ - { - "address": "10.10.160.2", - "type": "dns" - } - ] - }, - "user-data": "I2Nsb3VkLWNvbmZpZ...", - "vendor-data": null - }, - "v1": { - "availability-zone": null, - "cloud-name": "openstack", - "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0", - "local-hostname": "xenial-test", - "region": null - } - } - - -As of cloud-init v. 18.4, any values present in -``/run/cloud-init/instance-data.json`` can be used in cloud-init user data -scripts or cloud config data. This allows consumers to use cloud-init's -vendor-neutral, standardized metadata keys as well as datasource-specific -content for any scripts or cloud-config modules they are using. - -To use instance-data.json values in scripts and **#config-config** files the -user-data will need to contain the following header as the first line **## template: jinja**. Cloud-init will source all variables defined in -``/run/cloud-init/instance-data.json`` and allow scripts or cloud-config files -to reference those paths. Below are two examples:: - - * Cloud config calling home with the ec2 public hostname and avaliability-zone - ``` - ## template: jinja - #cloud-config - runcmd: - - echo 'EC2 public hostname allocated to instance: {{ ds.meta_data.public_hostname }}' > /tmp/instance_metadata - - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> /tmp/instance_metadata - - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", "availability-zone": "{{ v1.availability_zone }}"}' https://example.com.com - ``` - - * Custom user script performing different operations based on region - ``` - ## template: jinja - #!/bin/bash - {% if v1.region == 'us-east-2' -%} - echo 'Installing custom proxies for {{ v1.region }} - sudo apt-get install my-xtra-fast-stack - {%- endif %} - ... - - ``` - -.. note:: - Trying to reference jinja variables that don't exist in - instance-data.json will result in warnings in ``/var/log/cloud-init.log`` - and the following string in your rendered user-data: - ``CI_MISSING_JINJA_VAR/``. - -.. note:: - To save time designing your user-data for a specific cloud's - instance-data.json, use the 'render' cloud-init command on an - instance booted on your favorite cloud. See :ref:`cli_devel` for more - information. +Any metadata processed by cloud-init's datasources is persisted as +``/run/cloud0-init/instance-data.json``. Cloud-init provides tooling +to quickly introspect some of that data. See :ref:`instance_metadata` for +more information. Datasource API @@ -196,14 +60,14 @@ The current interface that a datasource object must provide is the following: # or does not exist) def device_name_to_device(self, name) - # gets the locale string this instance should be applying + # gets the locale string this instance should be applying # which typically used to adjust the instances locale settings files def get_locale(self) @property def availability_zone(self) - # gets the instance id that was assigned to this instance by the + # gets the instance id that was assigned to this instance by the # cloud provider or when said instance id does not exist in the backing # metadata this will return 'iid-datasource' def get_instance_id(self) diff --git a/doc/rtd/topics/instancedata.rst b/doc/rtd/topics/instancedata.rst new file mode 100644 index 00000000..634e1807 --- /dev/null +++ b/doc/rtd/topics/instancedata.rst @@ -0,0 +1,297 @@ +.. _instance_metadata: + +***************** +Instance Metadata +***************** + +What is a instance data? +======================== + +Instance data is the collection of all configuration data that cloud-init +processes to configure the instance. This configuration typically +comes from any number of sources: + +* cloud-provided metadata services (aka metadata) +* custom config-drive attached to the instance +* cloud-config seed files in the booted cloud image or distribution +* vendordata provided from files or cloud metadata services +* userdata provided at instance creation + +Each cloud provider presents unique configuration metadata in different +formats to the instance. Cloud-init provides a cache of any crawled metadata +as well as a versioned set of standardized instance data keys which it makes +available on all platforms. + +Cloud-init produces a simple json object in +``/run/cloud-init/instance-data.json`` which represents standardized and +versioned representation of the metadata it consumes during initial boot. The +intent is to provide the following benefits to users or scripts on any system +deployed with cloud-init: + +* simple static object to query to obtain a instance's metadata +* speed: avoid costly network transactions for metadata that is already cached + on the filesytem +* reduce need to recrawl metadata services for static metadata that is already + cached +* leverage cloud-init's best practices for crawling cloud-metadata services +* avoid rolling unique metadata crawlers on each cloud platform to get + metadata configuration values + +Cloud-init stores any instance data processed in the following files: + +* ``/run/cloud-init/instance-data.json``: world-readable json containing + standardized keys, sensitive keys redacted +* ``/run/cloud-init/instance-data-sensitive.json``: root-readable unredacted + json blob +* ``/var/lib/cloud/instance/user-data.txt``: root-readable sensitive raw + userdata +* ``/var/lib/cloud/instance/vendor-data.txt``: root-readable sensitive raw + vendordata + +Cloud-init redacts any security sensitive content from instance-data.json, +stores ``/run/cloud-init/instance-data.json`` as a world-readable json file. +Because user-data and vendor-data can contain passwords both of these files +are readonly for *root* as well. The *root* user can also read +``/run/cloud-init/instance-data-sensitive.json`` which is all instance data +from instance-data.json as well as unredacted sensitive content. + + +Format of instance-data.json +============================ + +The instance-data.json and instance-data-sensitive.json files are well-formed +JSON and record the set of keys and values for any metadata processed by +cloud-init. Cloud-init standardizes the format for this content so that it +can be generalized across different cloud platforms. + +There are three basic top-level keys: + +* **base64_encoded_keys**: A list of forward-slash delimited key paths into + the instance-data.json object whose value is base64encoded for json + compatibility. Values at these paths should be decoded to get the original + value. + +* **sensitive_keys**: A list of forward-slash delimited key paths into + the instance-data.json object whose value is considered by the datasource as + 'security sensitive'. Only the keys listed here will be redacted from + instance-data.json for non-root users. + +* **ds**: Datasource-specific metadata crawled for the specific cloud + platform. It should closely represent the structure of the cloud metadata + crawled. The structure of content and details provided are entirely + cloud-dependent. Mileage will vary depending on what the cloud exposes. + The content exposed under the 'ds' key is currently **experimental** and + expected to change slightly in the upcoming cloud-init release. + +* **v1**: Standardized cloud-init metadata keys, these keys are guaranteed to + exist on all cloud platforms. They will also retain their current behavior + and format and will be carried forward even if cloud-init introduces a new + version of standardized keys with **v2**. + +The standardized keys present: + ++----------------------+-----------------------------------------------+---------------------------+ +| Key path | Description | Examples | ++======================+===============================================+===========================+ +| v1.cloud_name | The name of the cloud provided by metadata | aws, openstack, azure, | +| | key 'cloud-name' or the cloud-init datasource | configdrive, nocloud, | +| | name which was discovered. | ovf, etc. | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.instance_id | Unique instance_id allocated by the cloud | i- | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.local_hostname | The internal or local hostname of the system | ip-10-41-41-70, | +| | | | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.region | The physical region/datacenter in which the | us-east-2 | +| | instance is deployed | | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.availability_zone | The physical availability zone in which the | us-east-2b, nova, null | +| | instance is deployed | | ++----------------------+-----------------------------------------------+---------------------------+ + + +Below is an example of ``/run/cloud-init/instance_data.json`` on an EC2 +instance: + +.. sourcecode:: json + + { + "base64_encoded_keys": [], + "sensitive_keys": [], + "ds": { + "meta_data": { + "ami-id": "ami-014e1416b628b0cbf", + "ami-launch-index": "0", + "ami-manifest-path": "(unknown)", + "block-device-mapping": { + "ami": "/dev/sda1", + "ephemeral0": "sdb", + "ephemeral1": "sdc", + "root": "/dev/sda1" + }, + "hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "instance-action": "none", + "instance-id": "i-04fa31cfc55aa7976", + "instance-type": "t2.micro", + "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "local-ipv4": "10.41.41.70", + "mac": "06:b6:92:dd:9d:24", + "metrics": { + "vhostmd": "" + }, + "network": { + "interfaces": { + "macs": { + "06:b6:92:dd:9d:24": { + "device-number": "0", + "interface-id": "eni-08c0c9fdb99b6e6f4", + "ipv4-associations": { + "18.224.22.43": "10.41.41.70" + }, + "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "local-ipv4s": "10.41.41.70", + "mac": "06:b6:92:dd:9d:24", + "owner-id": "437526006925", + "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", + "public-ipv4s": "18.224.22.43", + "security-group-ids": "sg-828247e9", + "security-groups": "Cloud-init integration test secgroup", + "subnet-id": "subnet-282f3053", + "subnet-ipv4-cidr-block": "10.41.41.0/24", + "subnet-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/64", + "vpc-id": "vpc-252ef24d", + "vpc-ipv4-cidr-block": "10.41.0.0/16", + "vpc-ipv4-cidr-blocks": "10.41.0.0/16", + "vpc-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/56" + } + } + } + }, + "placement": { + "availability-zone": "us-east-2b" + }, + "profile": "default-hvm", + "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", + "public-ipv4": "18.224.22.43", + "public-keys": { + "cloud-init-integration": [ + "ssh-rsa + AAAAB3NzaC1yc2EAAAADAQABAAABAQDSL7uWGj8cgWyIOaspgKdVy0cKJ+UTjfv7jBOjG2H/GN8bJVXy72XAvnhM0dUM+CCs8FOf0YlPX+Frvz2hKInrmRhZVwRSL129PasD12MlI3l44u6IwS1o/W86Q+tkQYEljtqDOo0a+cOsaZkvUNzUyEXUwz/lmYa6G4hMKZH4NBj7nbAAF96wsMCoyNwbWryBnDYUr6wMbjRR1J9Pw7Xh7WRC73wy4Va2YuOgbD3V/5ZrFPLbWZW/7TFXVrql04QVbyei4aiFR5n//GvoqwQDNe58LmbzX/xvxyKJYdny2zXmdAhMxbrpFQsfpkJ9E/H5w0yOdSvnWbUoG5xNGoOB + cloud-init-integration" + ] + }, + "reservation-id": "r-06ab75e9346f54333", + "security-groups": "Cloud-init integration test secgroup", + "services": { + "domain": "amazonaws.com", + "partition": "aws" + } + } + }, + "v1": { + "availability-zone": "us-east-2b", + "availability_zone": "us-east-2b", + "cloud-name": "aws", + "cloud_name": "aws", + "instance-id": "i-04fa31cfc55aa7976", + "instance_id": "i-04fa31cfc55aa7976", + "local-hostname": "ip-10-41-41-70", + "local_hostname": "ip-10-41-41-70", + "region": "us-east-2" + } + } + + +Using instance-data +=================== + +As of cloud-init v. 18.4, any variables present in +``/run/cloud-init/instance-data.json`` can be used in: + +* User-data scripts +* Cloud config data +* Command line interface via **cloud-init query** or + **cloud-init devel render** + +Many clouds allow users to provide user-data to an instance at +the time the instance is launched. Cloud-init supports a number of +:ref:`user_data_formats`. + +Both user-data scripts and **#cloud-config** data support jinja template +rendering. +When the first line of the provided user-data begins with, +**## template: jinja** cloud-init will use jinja to render that file. +Any instance-data-sensitive.json variables are surfaced as dot-delimited +jinja template variables because cloud-config modules are run as 'root' +user. + + +Below are some examples of providing these types of user-data: + +* Cloud config calling home with the ec2 public hostname and avaliability-zone + +.. code-block:: shell-session + + ## template: jinja + #cloud-config + runcmd: + - echo 'EC2 public hostname allocated to instance: {{ + ds.meta_data.public_hostname }}' > /tmp/instance_metadata + - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> + /tmp/instance_metadata + - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", + "availability-zone": "{{ v1.availability_zone }}"}' + https://example.com + +* Custom user-data script performing different operations based on region + +.. code-block:: shell-session + + ## template: jinja + #!/bin/bash + {% if v1.region == 'us-east-2' -%} + echo 'Installing custom proxies for {{ v1.region }} + sudo apt-get install my-xtra-fast-stack + {%- endif %} + ... + +.. note:: + Trying to reference jinja variables that don't exist in + instance-data.json will result in warnings in ``/var/log/cloud-init.log`` + and the following string in your rendered user-data: + ``CI_MISSING_JINJA_VAR/``. + +Cloud-init also surfaces a commandline tool **cloud-init query** which can +assist developers or scripts with obtaining instance metadata easily. See +:ref:`cli_query` for more information. + +To cut down on keystrokes on the command line, cloud-init also provides +top-level key aliases for any standardized ``v#`` keys present. The preceding +``v1`` is not required of ``v1.var_name`` These aliases will represent the +value of the highest versioned standard key. For example, ``cloud_name`` +value will be ``v2.cloud_name`` if both ``v1`` and ``v2`` keys are present in +instance-data.json. +The **query** command also publishes ``userdata`` and ``vendordata`` keys to +the root user which will contain the decoded user and vendor data provided to +this instance. Non-root users referencing userdata or vendordata keys will +see only redacted values. + +.. code-block:: shell-session + + # List all top-level instance-data keys available + % cloud-init query --list-keys + + # Find your EC2 ami-id + % cloud-init query ds.metadata.ami_id + + # Format your cloud_name and region using jinja template syntax + % cloud-init query --format 'cloud: {{ v1.cloud_name }} myregion: {{ + % v1.region }}' + +.. note:: + To save time designing a user-data template for a specific cloud's + instance-data.json, use the 'render' cloud-init command on an + instance booted on your favorite cloud. See :ref:`cli_devel` for more + information. + +.. vi: textwidth=78 diff --git a/integration-requirements.txt b/integration-requirements.txt index f80cb942..880d9886 100644 --- a/integration-requirements.txt +++ b/integration-requirements.txt @@ -5,16 +5,17 @@ # the packages/pkg-deps.json file as well. # +unittest2 # ec2 backend boto3==1.5.9 # ssh communication paramiko==2.4.1 + # lxd backend # 04/03/2018: enables use of lxd 3.0 git+https://github.com/lxc/pylxd.git@4b8ab1802f9aee4eb29cf7b119dae0aa47150779 - # finds latest image information git+https://git.launchpad.net/simplestreams diff --git a/tests/cloud_tests/testcases/base.py b/tests/cloud_tests/testcases/base.py index 27458271..c5457968 100644 --- a/tests/cloud_tests/testcases/base.py +++ b/tests/cloud_tests/testcases/base.py @@ -5,15 +5,15 @@ import crypt import json import re -import unittest +import unittest2 from cloudinit import util as c_util -SkipTest = unittest.SkipTest +SkipTest = unittest2.SkipTest -class CloudTestCase(unittest.TestCase): +class CloudTestCase(unittest2.TestCase): """Base test class for verifiers.""" # data gets populated in get_suite.setUpClass @@ -167,8 +167,9 @@ class CloudTestCase(unittest.TestCase): 'Skipping instance-data.json test.' ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) - self.assertEqual( - ['ds/user_data'], instance_data['base64_encoded_keys']) + self.assertItemsEqual( + [], + instance_data['base64_encoded_keys']) ds = instance_data.get('ds', {}) v1_data = instance_data.get('v1', {}) metadata = ds.get('meta-data', {}) @@ -187,10 +188,10 @@ class CloudTestCase(unittest.TestCase): metadata.get('placement', {}).get('availability-zone'), 'Could not determine EC2 Availability zone placement') self.assertIsNotNone( - v1_data['availability-zone'], 'expected ec2 availability-zone') - self.assertEqual('aws', v1_data['cloud-name']) - self.assertIn('i-', v1_data['instance-id']) - self.assertIn('ip-', v1_data['local-hostname']) + v1_data['availability_zone'], 'expected ec2 availability_zone') + self.assertEqual('aws', v1_data['cloud_name']) + self.assertIn('i-', v1_data['instance_id']) + self.assertIn('ip-', v1_data['local_hostname']) self.assertIsNotNone(v1_data['region'], 'expected ec2 region') def test_instance_data_json_lxd(self): @@ -213,16 +214,14 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) - self.assertEqual( - ['ds/user_data', 'ds/vendor_data'], - sorted(instance_data['base64_encoded_keys'])) - self.assertEqual('nocloud', v1_data['cloud-name']) + self.assertItemsEqual([], sorted(instance_data['base64_encoded_keys'])) + self.assertEqual('nocloud', v1_data['cloud_name']) self.assertIsNone( - v1_data['availability-zone'], - 'found unexpected lxd availability-zone %s' % - v1_data['availability-zone']) - self.assertIn('cloud-test', v1_data['instance-id']) - self.assertIn('cloud-test', v1_data['local-hostname']) + v1_data['availability_zone'], + 'found unexpected lxd availability_zone %s' % + v1_data['availability_zone']) + self.assertIn('cloud-test', v1_data['instance_id']) + self.assertIn('cloud-test', v1_data['local_hostname']) self.assertIsNone( v1_data['region'], 'found unexpected lxd region %s' % v1_data['region']) @@ -248,18 +247,17 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) - self.assertEqual( - ['ds/user_data'], instance_data['base64_encoded_keys']) - self.assertEqual('nocloud', v1_data['cloud-name']) + self.assertItemsEqual([], instance_data['base64_encoded_keys']) + self.assertEqual('nocloud', v1_data['cloud_name']) self.assertIsNone( - v1_data['availability-zone'], - 'found unexpected kvm availability-zone %s' % - v1_data['availability-zone']) + v1_data['availability_zone'], + 'found unexpected kvm availability_zone %s' % + v1_data['availability_zone']) self.assertIsNotNone( re.match(r'[\da-f]{8}(-[\da-f]{4}){3}-[\da-f]{12}', - v1_data['instance-id']), - 'kvm instance-id is not a UUID: %s' % v1_data['instance-id']) - self.assertIn('ubuntu', v1_data['local-hostname']) + v1_data['instance_id']), + 'kvm instance_id is not a UUID: %s' % v1_data['instance_id']) + self.assertIn('ubuntu', v1_data['local_hostname']) self.assertIsNone( v1_data['region'], 'found unexpected lxd region %s' % v1_data['region']) -- cgit v1.2.3