From a8dcad9ac62bb1d2a4f7489960395bad6cac9382 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Wed, 5 Sep 2018 16:02:25 +0000 Subject: tests: Disallow use of util.subp except for where needed. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In many cases, cloud-init uses 'util.subp' to run a subprocess. This is not really desirable in our unit tests as it makes the tests dependent upon existance of those utilities. The change here is to modify the base test case class (CiTestCase) to raise exception any time subp is called. Then, fix all callers. For cases where subp is necessary or actually desired, we can use it via   a.) context hander CiTestCase.allow_subp(value)   b.) class level self.allowed_subp = value Both cases the value is a list of acceptable executable names that will be called (essentially argv[0]). Some cleanups in AltCloud were done as the code was being updated. --- cloudinit/cmd/tests/test_status.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'cloudinit/cmd/tests') diff --git a/cloudinit/cmd/tests/test_status.py b/cloudinit/cmd/tests/test_status.py index 37a89936..aded8580 100644 --- a/cloudinit/cmd/tests/test_status.py +++ b/cloudinit/cmd/tests/test_status.py @@ -39,7 +39,8 @@ class TestStatus(CiTestCase): ensure_file(self.disable_file) # Create the ignored disable file (is_disabled, reason) = wrap_and_call( 'cloudinit.cmd.status', - {'uses_systemd': False}, + {'uses_systemd': False, + 'get_cmdline': "root=/dev/my-root not-important"}, status._is_cloudinit_disabled, self.disable_file, self.paths) self.assertFalse( is_disabled, 'expected enabled cloud-init on sysvinit') @@ -50,7 +51,8 @@ class TestStatus(CiTestCase): ensure_file(self.disable_file) # Create observed disable file (is_disabled, reason) = wrap_and_call( 'cloudinit.cmd.status', - {'uses_systemd': True}, + {'uses_systemd': True, + 'get_cmdline': "root=/dev/my-root not-important"}, status._is_cloudinit_disabled, self.disable_file, self.paths) self.assertTrue(is_disabled, 'expected disabled cloud-init') self.assertEqual( -- cgit v1.2.3 From c714651c1988a17f457426de63cdb8514d5a81b4 Mon Sep 17 00:00:00 2001 From: Robert Schweikert Date: Mon, 17 Sep 2018 13:07:39 +0000 Subject: Fall back to root:root on syslog permissions if other options fail. If the user has removed the default configuration file or does not set the syslog_fix_perms config option the user still ends up with a warning on SUSE distributions. Add root:root to the default builtin config. --- cloudinit/cmd/tests/test_main.py | 4 +++- cloudinit/settings.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'cloudinit/cmd/tests') diff --git a/cloudinit/cmd/tests/test_main.py b/cloudinit/cmd/tests/test_main.py index e2c54ae8..a1e534fb 100644 --- a/cloudinit/cmd/tests/test_main.py +++ b/cloudinit/cmd/tests/test_main.py @@ -125,7 +125,9 @@ class TestMain(FilesystemMockingTestCase): updated_cfg.update( {'def_log_file': '/var/log/cloud-init.log', 'log_cfgs': [], - 'syslog_fix_perms': ['syslog:adm', 'root:adm', 'root:wheel'], + 'syslog_fix_perms': [ + 'syslog:adm', 'root:adm', 'root:wheel', 'root:root' + ], 'vendor_data': {'enabled': True, 'prefix': []}}) updated_cfg.pop('system_info') diff --git a/cloudinit/settings.py b/cloudinit/settings.py index ea367cb7..b1ebaade 100644 --- a/cloudinit/settings.py +++ b/cloudinit/settings.py @@ -44,7 +44,7 @@ CFG_BUILTIN = { ], 'def_log_file': '/var/log/cloud-init.log', 'log_cfgs': [], - 'syslog_fix_perms': ['syslog:adm', 'root:adm', 'root:wheel'], + 'syslog_fix_perms': ['syslog:adm', 'root:adm', 'root:wheel', 'root:root'], 'system_info': { 'paths': { 'cloud_dir': '/var/lib/cloud', -- cgit v1.2.3 From fc4b966ba928b30b1c586407e752e0b51b1031e8 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Tue, 25 Sep 2018 21:59:16 +0000 Subject: cli: add cloud-init query subcommand to query instance metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cloud-init caches any cloud metadata crawled during boot in the file /run/cloud-init/instance-data.json. Cloud-init also standardizes some of that metadata across all clouds. The command 'cloud-init query' surfaces a simple CLI to query or format any cached instance metadata so that scripts or end-users do not have to write tools to crawl metadata themselves. Since 'cloud-init query' is runnable by non-root users, redact any sensitive data from instance-data.json and provide a root-readable unredacted instance-data-sensitive.json. Datasources can now define a sensitive_metadata_keys tuple which will redact any matching keys which could contain passwords or credentials from instance-data.json. Also add the following standardized 'v1' instance-data.json keys:   - user_data: The base64encoded user-data provided at instance launch   - vendor_data: Any vendor_data provided to the instance at launch   - underscore_delimited versions of existing hyphenated keys:     instance_id, local_hostname, availability_zone, cloud_name --- bash_completion/cloud-init | 4 +- cloudinit/cmd/devel/render.py | 7 +- cloudinit/cmd/main.py | 10 ++ cloudinit/cmd/query.py | 155 ++++++++++++++++++ cloudinit/cmd/tests/test_query.py | 193 +++++++++++++++++++++++ cloudinit/helpers.py | 4 + cloudinit/sources/__init__.py | 76 +++++++-- cloudinit/sources/tests/test_init.py | 130 ++++++++++++--- doc/rtd/index.rst | 1 + doc/rtd/topics/capabilities.rst | 105 ++++++++++--- doc/rtd/topics/datasources.rst | 148 +---------------- doc/rtd/topics/instancedata.rst | 297 +++++++++++++++++++++++++++++++++++ integration-requirements.txt | 3 +- tests/cloud_tests/testcases/base.py | 52 +++--- 14 files changed, 952 insertions(+), 233 deletions(-) create mode 100644 cloudinit/cmd/query.py create mode 100644 cloudinit/cmd/tests/test_query.py create mode 100644 doc/rtd/topics/instancedata.rst (limited to 'cloudinit/cmd/tests') diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init index 6d01bf3a..8c25032f 100644 --- a/bash_completion/cloud-init +++ b/bash_completion/cloud-init @@ -10,7 +10,7 @@ _cloudinit_complete() cur_word="${COMP_WORDS[COMP_CWORD]}" prev_word="${COMP_WORDS[COMP_CWORD-1]}" - subcmds="analyze clean collect-logs devel dhclient-hook features init modules single status" + subcmds="analyze clean collect-logs devel dhclient-hook features init modules query single status" base_params="--help --file --version --debug --force" case ${COMP_CWORD} in 1) @@ -40,6 +40,8 @@ _cloudinit_complete() COMPREPLY=($(compgen -W "--help --mode" -- $cur_word)) ;; + query) + COMPREPLY=($(compgen -W "--all --help --instance-data --list-keys --user-data --vendor-data --debug" -- $cur_word));; single) COMPREPLY=($(compgen -W "--help --name --frequency --report" -- $cur_word)) ;; diff --git a/cloudinit/cmd/devel/render.py b/cloudinit/cmd/devel/render.py index e85933db..2ba6b681 100755 --- a/cloudinit/cmd/devel/render.py +++ b/cloudinit/cmd/devel/render.py @@ -9,7 +9,6 @@ import sys from cloudinit.handlers.jinja_template import render_jinja_payload_from_file from cloudinit import log from cloudinit.sources import INSTANCE_JSON_FILE -from cloudinit import util from . import addLogHandlerCLI, read_cfg_paths NAME = 'render' @@ -54,11 +53,7 @@ def handle_args(name, args): paths.run_dir, INSTANCE_JSON_FILE) else: instance_data_fn = args.instance_data - try: - with open(instance_data_fn) as stream: - instance_data = stream.read() - instance_data = util.load_json(instance_data) - except IOError: + if not os.path.exists(instance_data_fn): LOG.error('Missing instance-data.json file: %s', instance_data_fn) return 1 try: diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 0eee583c..5a437020 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -791,6 +791,10 @@ def main(sysv_args=None): ' pass to this module')) parser_single.set_defaults(action=('single', main_single)) + parser_query = subparsers.add_parser( + 'query', + help='Query standardized instance metadata from the command line.') + parser_dhclient = subparsers.add_parser('dhclient-hook', help=('run the dhclient hook' 'to record network info')) @@ -842,6 +846,12 @@ def main(sysv_args=None): clean_parser(parser_clean) parser_clean.set_defaults( action=('clean', handle_clean_args)) + elif sysv_args[0] == 'query': + from cloudinit.cmd.query import ( + get_parser as query_parser, handle_args as handle_query_args) + query_parser(parser_query) + parser_query.set_defaults( + action=('render', handle_query_args)) elif sysv_args[0] == 'status': from cloudinit.cmd.status import ( get_parser as status_parser, handle_status_args) diff --git a/cloudinit/cmd/query.py b/cloudinit/cmd/query.py new file mode 100644 index 00000000..7d2d4fe4 --- /dev/null +++ b/cloudinit/cmd/query.py @@ -0,0 +1,155 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Query standardized instance metadata from the command line.""" + +import argparse +import os +import six +import sys + +from cloudinit.handlers.jinja_template import ( + convert_jinja_instance_data, render_jinja_payload) +from cloudinit.cmd.devel import addLogHandlerCLI, read_cfg_paths +from cloudinit import log +from cloudinit.sources import ( + INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE) +from cloudinit import util + +NAME = 'query' +LOG = log.getLogger(NAME) + + +def get_parser(parser=None): + """Build or extend an arg parser for query utility. + + @param parser: Optional existing ArgumentParser instance representing the + query subcommand which will be extended to support the args of + this utility. + + @returns: ArgumentParser with proper argument configuration. + """ + if not parser: + parser = argparse.ArgumentParser( + prog=NAME, description='Query cloud-init instance data') + parser.add_argument( + '-d', '--debug', action='store_true', default=False, + help='Add verbose messages during template render') + parser.add_argument( + '-i', '--instance-data', type=str, + help=('Path to instance-data.json file. Default is /run/cloud-init/%s' + % INSTANCE_JSON_FILE)) + parser.add_argument( + '-l', '--list-keys', action='store_true', default=False, + help=('List query keys available at the provided instance-data' + ' .')) + parser.add_argument( + '-u', '--user-data', type=str, + help=('Path to user-data file. Default is' + ' /var/lib/cloud/instance/user-data.txt')) + parser.add_argument( + '-v', '--vendor-data', type=str, + help=('Path to vendor-data file. Default is' + ' /var/lib/cloud/instance/vendor-data.txt')) + parser.add_argument( + 'varname', type=str, nargs='?', + help=('A dot-delimited instance data variable to query from' + ' instance-data query. For example: v2.local_hostname')) + parser.add_argument( + '-a', '--all', action='store_true', default=False, dest='dump_all', + help='Dump all available instance-data') + parser.add_argument( + '-f', '--format', type=str, dest='format', + help=('Optionally specify a custom output format string. Any' + ' instance-data variable can be specified between double-curly' + ' braces. For example -f "{{ v2.cloud_name }}"')) + return parser + + +def handle_args(name, args): + """Handle calls to 'cloud-init query' as a subcommand.""" + paths = None + addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING) + if not any([args.list_keys, args.varname, args.format, args.dump_all]): + LOG.error( + 'Expected one of the options: --all, --format,' + ' --list-keys or varname') + get_parser().print_help() + return 1 + + uid = os.getuid() + if not all([args.instance_data, args.user_data, args.vendor_data]): + paths = read_cfg_paths() + if not args.instance_data: + if uid == 0: + default_json_fn = INSTANCE_JSON_SENSITIVE_FILE + else: + default_json_fn = INSTANCE_JSON_FILE # World readable + instance_data_fn = os.path.join(paths.run_dir, default_json_fn) + else: + instance_data_fn = args.instance_data + if not args.user_data: + user_data_fn = os.path.join(paths.instance_link, 'user-data.txt') + else: + user_data_fn = args.user_data + if not args.vendor_data: + vendor_data_fn = os.path.join(paths.instance_link, 'vendor-data.txt') + else: + vendor_data_fn = args.vendor_data + + try: + instance_json = util.load_file(instance_data_fn) + except IOError: + LOG.error('Missing instance-data.json file: %s', instance_data_fn) + return 1 + + instance_data = util.load_json(instance_json) + if uid != 0: + instance_data['userdata'] = ( + '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, user_data_fn)) + instance_data['vendordata'] = ( + '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, vendor_data_fn)) + else: + instance_data['userdata'] = util.load_file(user_data_fn) + instance_data['vendordata'] = util.load_file(vendor_data_fn) + if args.format: + payload = '## template: jinja\n{fmt}'.format(fmt=args.format) + rendered_payload = render_jinja_payload( + payload=payload, payload_fn='query commandline', + instance_data=instance_data, + debug=True if args.debug else False) + if rendered_payload: + print(rendered_payload) + return 0 + return 1 + + response = convert_jinja_instance_data(instance_data) + if args.varname: + try: + for var in args.varname.split('.'): + response = response[var] + except KeyError: + LOG.error('Undefined instance-data key %s', args.varname) + return 1 + if args.list_keys: + if not isinstance(response, dict): + LOG.error("--list-keys provided but '%s' is not a dict", var) + return 1 + response = '\n'.join(sorted(response.keys())) + elif args.list_keys: + response = '\n'.join(sorted(response.keys())) + if not isinstance(response, six.string_types): + response = util.json_dumps(response) + print(response) + return 0 + + +def main(): + """Tool to query specific instance-data values.""" + parser = get_parser() + sys.exit(handle_args(NAME, parser.parse_args())) + + +if __name__ == '__main__': + main() + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/tests/test_query.py b/cloudinit/cmd/tests/test_query.py new file mode 100644 index 00000000..fb87c6ab --- /dev/null +++ b/cloudinit/cmd/tests/test_query.py @@ -0,0 +1,193 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from six import StringIO +from textwrap import dedent +import os + +from collections import namedtuple +from cloudinit.cmd import query +from cloudinit.helpers import Paths +from cloudinit.sources import REDACT_SENSITIVE_VALUE, INSTANCE_JSON_FILE +from cloudinit.tests.helpers import CiTestCase, mock +from cloudinit.util import ensure_dir, write_file + + +class TestQuery(CiTestCase): + + with_logs = True + + args = namedtuple( + 'queryargs', + ('debug dump_all format instance_data list_keys user_data vendor_data' + ' varname')) + + def setUp(self): + super(TestQuery, self).setUp() + self.tmp = self.tmp_dir() + self.instance_data = self.tmp_path('instance-data', dir=self.tmp) + + def test_handle_args_error_on_missing_param(self): + """Error when missing required parameters and print usage.""" + args = self.args( + debug=False, dump_all=False, format=None, instance_data=None, + list_keys=False, user_data=None, vendor_data=None, varname=None) + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(1, query.handle_args('anyname', args)) + expected_error = ( + 'ERROR: Expected one of the options: --all, --format, --list-keys' + ' or varname\n') + self.assertIn(expected_error, self.logs.getvalue()) + self.assertIn('usage: query', m_stdout.getvalue()) + self.assertIn(expected_error, m_stderr.getvalue()) + + def test_handle_args_error_on_missing_instance_data(self): + """When instance_data file path does not exist, log an error.""" + absent_fn = self.tmp_path('absent', dir=self.tmp) + args = self.args( + debug=False, dump_all=True, format=None, instance_data=absent_fn, + list_keys=False, user_data='ud', vendor_data='vd', varname=None) + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + self.assertEqual(1, query.handle_args('anyname', args)) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % absent_fn, + self.logs.getvalue()) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % absent_fn, + m_stderr.getvalue()) + + def test_handle_args_defaults_instance_data(self): + """When no instance_data argument, default to configured run_dir.""" + args = self.args( + debug=False, dump_all=True, format=None, instance_data=None, + list_keys=False, user_data=None, vendor_data=None, varname=None) + run_dir = self.tmp_path('run_dir', dir=self.tmp) + ensure_dir(run_dir) + paths = Paths({'run_dir': run_dir}) + self.add_patch('cloudinit.cmd.query.read_cfg_paths', 'm_paths') + self.m_paths.return_value = paths + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + self.assertEqual(1, query.handle_args('anyname', args)) + json_file = os.path.join(run_dir, INSTANCE_JSON_FILE) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % json_file, + self.logs.getvalue()) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % json_file, + m_stderr.getvalue()) + + def test_handle_args_dumps_all_instance_data(self): + """When --all is specified query will dump all instance data vars.""" + write_file(self.instance_data, '{"my-var": "it worked"}') + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, list_keys=False, + user_data='ud', vendor_data='vd', varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual( + '{\n "my_var": "it worked",\n "userdata": "<%s> file:ud",\n' + ' "vendordata": "<%s> file:vd"\n}\n' % ( + REDACT_SENSITIVE_VALUE, REDACT_SENSITIVE_VALUE), + m_stdout.getvalue()) + + def test_handle_args_returns_top_level_varname(self): + """When the argument varname is passed, report its value.""" + write_file(self.instance_data, '{"my-var": "it worked"}') + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, list_keys=False, + user_data='ud', vendor_data='vd', varname='my_var') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual('it worked\n', m_stdout.getvalue()) + + def test_handle_args_returns_nested_varname(self): + """If user_data file is a jinja template render instance-data vars.""" + write_file(self.instance_data, + '{"v1": {"key-2": "value-2"}, "my-var": "it worked"}') + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, user_data='ud', vendor_data='vd', + list_keys=False, varname='v1.key_2') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual('value-2\n', m_stdout.getvalue()) + + def test_handle_args_returns_standardized_vars_to_top_level_aliases(self): + """Any standardized vars under v# are promoted as top-level aliases.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},' + ' "top": "gun"}') + expected = dedent("""\ + { + "top": "gun", + "userdata": " file:ud", + "v1": { + "v1_1": "val1.1" + }, + "v1_1": "val1.1", + "v2": { + "v2_2": "val2.2" + }, + "v2_2": "val2.2", + "vendordata": " file:vd" + } + """) + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, user_data='ud', vendor_data='vd', + list_keys=False, varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_sorts_top_level_keys_when_no_varname(self): + """Sort all top-level keys when only --list-keys provided.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},' + ' "top": "gun"}') + expected = 'top\nuserdata\nv1\nv1_1\nv2\nv2_2\nvendordata\n' + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, user_data='ud', + vendor_data='vd', varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_sorts_nested_keys_when_varname(self): + """Sort all nested keys of varname object when --list-keys provided.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2":' + + ' {"v2_2": "val2.2"}, "top": "gun"}') + expected = 'v1_1\nv1_2\n' + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, + user_data='ud', vendor_data='vd', varname='v1') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_errors_when_varname_is_not_a_dict(self): + """Raise an error when --list-keys and varname specify a non-list.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2": ' + + '{"v2_2": "val2.2"}, "top": "gun"}') + expected_error = "ERROR: --list-keys provided but 'top' is not a dict" + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, user_data='ud', + vendor_data='vd', varname='top') + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(1, query.handle_args('anyname', args)) + self.assertEqual('', m_stdout.getvalue()) + self.assertIn(expected_error, m_stderr.getvalue()) + +# vi: ts=4 expandtab diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py index 3cc1fb19..dcd2645e 100644 --- a/cloudinit/helpers.py +++ b/cloudinit/helpers.py @@ -239,6 +239,10 @@ class ConfigMerger(object): if cc_fn and os.path.isfile(cc_fn): try: i_cfgs.append(util.read_conf(cc_fn)) + except PermissionError: + LOG.debug( + 'Skipped loading cloud-config from %s due to' + ' non-root.', cc_fn) except Exception: util.logexc(LOG, 'Failed loading of cloud-config from %s', cc_fn) diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index a775f1a8..730e8174 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -38,8 +38,12 @@ DEP_FILESYSTEM = "FILESYSTEM" DEP_NETWORK = "NETWORK" DS_PREFIX = 'DataSource' -# File in which instance meta-data, user-data and vendor-data is written +# File in which public available instance meta-data is written +# security-sensitive key values are redacted from this world-readable file INSTANCE_JSON_FILE = 'instance-data.json' +# security-sensitive key values are present in this root-readable file +INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json' +REDACT_SENSITIVE_VALUE = 'redacted for non-root user' # Key which can be provide a cloud's official product name to cloud-init METADATA_CLOUD_NAME_KEY = 'cloud-name' @@ -58,7 +62,7 @@ class InvalidMetaDataException(Exception): pass -def process_instance_metadata(metadata, key_path=''): +def process_instance_metadata(metadata, key_path='', sensitive_keys=()): """Process all instance metadata cleaning it up for persisting as json. Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list @@ -67,22 +71,46 @@ def process_instance_metadata(metadata, key_path=''): """ md_copy = copy.deepcopy(metadata) md_copy['base64_encoded_keys'] = [] + md_copy['sensitive_keys'] = [] for key, val in metadata.items(): if key_path: sub_key_path = key_path + '/' + key else: sub_key_path = key + if key in sensitive_keys or sub_key_path in sensitive_keys: + md_copy['sensitive_keys'].append(sub_key_path) if isinstance(val, str) and val.startswith('ci-b64:'): md_copy['base64_encoded_keys'].append(sub_key_path) md_copy[key] = val.replace('ci-b64:', '') if isinstance(val, dict): - return_val = process_instance_metadata(val, sub_key_path) + return_val = process_instance_metadata( + val, sub_key_path, sensitive_keys) md_copy['base64_encoded_keys'].extend( return_val.pop('base64_encoded_keys')) + md_copy['sensitive_keys'].extend( + return_val.pop('sensitive_keys')) md_copy[key] = return_val return md_copy +def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE): + """Redact any sensitive keys from to provided metadata dictionary. + + Replace any keys values listed in 'sensitive_keys' with redact_value. + """ + if not metadata.get('sensitive_keys', []): + return metadata + md_copy = copy.deepcopy(metadata) + for key_path in metadata.get('sensitive_keys'): + path_parts = key_path.split('/') + obj = md_copy + for path in path_parts: + if isinstance(obj[path], dict) and path != path_parts[-1]: + obj = obj[path] + obj[path] = redact_value + return md_copy + + URLParams = namedtuple( 'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries']) @@ -127,6 +155,10 @@ class DataSource(object): _dirty_cache = False + # N-tuple of keypaths or keynames redact from instance-data.json for + # non-root users + sensitive_metadata_keys = ('security-credentials',) + def __init__(self, sys_cfg, distro, paths, ud_proc=None): self.sys_cfg = sys_cfg self.distro = distro @@ -152,12 +184,24 @@ class DataSource(object): def _get_standardized_metadata(self): """Return a dictionary of standardized metadata keys.""" - return {'v1': { - 'local-hostname': self.get_hostname(), - 'instance-id': self.get_instance_id(), - 'cloud-name': self.cloud_name, - 'region': self.region, - 'availability-zone': self.availability_zone}} + local_hostname = self.get_hostname() + instance_id = self.get_instance_id() + availability_zone = self.availability_zone + cloud_name = self.cloud_name + # When adding new standard keys prefer underscore-delimited instead + # of hyphen-delimted to support simple variable references in jinja + # templates. + return { + 'v1': { + 'availability-zone': availability_zone, + 'availability_zone': availability_zone, + 'cloud-name': cloud_name, + 'cloud_name': cloud_name, + 'instance-id': instance_id, + 'instance_id': instance_id, + 'local-hostname': local_hostname, + 'local_hostname': local_hostname, + 'region': self.region}} def clear_cached_attrs(self, attr_defaults=()): """Reset any cached metadata attributes to datasource defaults. @@ -200,9 +244,7 @@ class DataSource(object): """ instance_data = { 'ds': { - 'meta_data': self.metadata, - 'user_data': self.get_userdata_raw(), - 'vendor_data': self.get_vendordata_raw()}} + 'meta_data': self.metadata}} if hasattr(self, 'network_json'): network_json = getattr(self, 'network_json') if network_json != UNSET: @@ -217,7 +259,9 @@ class DataSource(object): # Process content base64encoding unserializable values content = util.json_dumps(instance_data) # Strip base64: prefix and set base64_encoded_keys list. - processed_data = process_instance_metadata(json.loads(content)) + processed_data = process_instance_metadata( + json.loads(content), + sensitive_keys=self.sensitive_metadata_keys) except TypeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) return False @@ -225,7 +269,11 @@ class DataSource(object): LOG.warning('Error persisting instance-data.json: %s', str(e)) return False json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) - write_json(json_file, processed_data, mode=0o600) + write_json(json_file, processed_data) # World readable + json_sensitive_file = os.path.join(self.paths.run_dir, + INSTANCE_JSON_SENSITIVE_FILE) + write_json(json_sensitive_file, + redact_sensitive_keys(processed_data), mode=0o600) return True def _get_data(self): diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index 8299af23..6b965750 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -1,5 +1,6 @@ # This file is part of cloud-init. See LICENSE file for license information. +import copy import inspect import os import six @@ -9,7 +10,8 @@ from cloudinit.event import EventType from cloudinit.helpers import Paths from cloudinit import importer from cloudinit.sources import ( - INSTANCE_JSON_FILE, DataSource, UNSET) + INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE, + UNSET, DataSource, redact_sensitive_keys) from cloudinit.tests.helpers import CiTestCase, skipIf, mock from cloudinit.user_data import UserDataProcessor from cloudinit import util @@ -20,20 +22,24 @@ class DataSourceTestSubclassNet(DataSource): dsname = 'MyTestSubclass' url_max_wait = 55 - def __init__(self, sys_cfg, distro, paths, custom_userdata=None, - get_data_retval=True): + def __init__(self, sys_cfg, distro, paths, custom_metadata=None, + custom_userdata=None, get_data_retval=True): super(DataSourceTestSubclassNet, self).__init__( sys_cfg, distro, paths) self._custom_userdata = custom_userdata + self._custom_metadata = custom_metadata self._get_data_retval = get_data_retval def _get_cloud_name(self): return 'SubclassCloudName' def _get_data(self): - self.metadata = {'availability_zone': 'myaz', - 'local-hostname': 'test-subclass-hostname', - 'region': 'myregion'} + if self._custom_metadata: + self.metadata = self._custom_metadata + else: + self.metadata = {'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion'} if self._custom_userdata: self.userdata_raw = self._custom_userdata else: @@ -278,7 +284,7 @@ class TestDataSource(CiTestCase): os.path.exists(json_file), 'Found unexpected file %s' % json_file) def test_get_data_writes_json_instance_data_on_success(self): - """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root.""" + """get_data writes INSTANCE_JSON_FILE to run_dir as world readable.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp})) @@ -287,40 +293,90 @@ class TestDataSource(CiTestCase): content = util.load_file(json_file) expected = { 'base64_encoded_keys': [], + 'sensitive_keys': [], 'v1': { 'availability-zone': 'myaz', + 'availability_zone': 'myaz', 'cloud-name': 'subclasscloudname', + 'cloud_name': 'subclasscloudname', 'instance-id': 'iid-datasource', + 'instance_id': 'iid-datasource', 'local-hostname': 'test-subclass-hostname', + 'local_hostname': 'test-subclass-hostname', 'region': 'myregion'}, 'ds': { 'meta_data': {'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', - 'region': 'myregion'}, - 'user_data': 'userdata_raw', - 'vendor_data': 'vendordata_raw'}} - self.maxDiff = None + 'region': 'myregion'}}} self.assertEqual(expected, util.load_json(content)) file_stat = os.stat(json_file) + self.assertEqual(0o644, stat.S_IMODE(file_stat.st_mode)) + self.assertEqual(expected, util.load_json(content)) + + def test_get_data_writes_json_instance_data_sensitive(self): + """get_data writes INSTANCE_JSON_SENSITIVE_FILE as readonly root.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp}), + custom_metadata={ + 'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion', + 'some': {'security-credentials': { + 'cred1': 'sekret', 'cred2': 'othersekret'}}}) + self.assertEqual( + ('security-credentials',), datasource.sensitive_metadata_keys) + datasource.get_data() + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + sensitive_json_file = self.tmp_path(INSTANCE_JSON_SENSITIVE_FILE, tmp) + redacted = util.load_json(util.load_file(json_file)) + self.assertEqual( + {'cred1': 'sekret', 'cred2': 'othersekret'}, + redacted['ds']['meta_data']['some']['security-credentials']) + content = util.load_file(sensitive_json_file) + expected = { + 'base64_encoded_keys': [], + 'sensitive_keys': ['ds/meta_data/some/security-credentials'], + 'v1': { + 'availability-zone': 'myaz', + 'availability_zone': 'myaz', + 'cloud-name': 'subclasscloudname', + 'cloud_name': 'subclasscloudname', + 'instance-id': 'iid-datasource', + 'instance_id': 'iid-datasource', + 'local-hostname': 'test-subclass-hostname', + 'local_hostname': 'test-subclass-hostname', + 'region': 'myregion'}, + 'ds': { + 'meta_data': { + 'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion', + 'some': {'security-credentials': REDACT_SENSITIVE_VALUE}}} + } + self.maxDiff = None + self.assertEqual(expected, util.load_json(content)) + file_stat = os.stat(sensitive_json_file) self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode)) + self.assertEqual(expected, util.load_json(content)) def test_get_data_handles_redacted_unserializable_content(self): """get_data warns unserializable content in INSTANCE_JSON_FILE.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) datasource.get_data() json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) - expected_userdata = { + expected_metadata = { 'key1': 'val1', 'key2': { 'key2.1': "Warning: redacted unserializable type "}} instance_json = util.load_json(content) self.assertEqual( - expected_userdata, instance_json['ds']['user_data']) + expected_metadata, instance_json['ds']['meta_data']) def test_persist_instance_data_writes_ec2_metadata_when_set(self): """When ec2_metadata class attribute is set, persist to json.""" @@ -361,17 +417,17 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) instance_json = util.load_json(content) - self.assertEqual( - ['ds/user_data/key2/key2.1'], + self.assertItemsEqual( + ['ds/meta_data/key2/key2.1'], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': 'EjM='}}, - instance_json['ds']['user_data']) + instance_json['ds']['meta_data']) @skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes") def test_get_data_handles_bytes_values(self): @@ -379,7 +435,7 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) @@ -387,7 +443,7 @@ class TestDataSource(CiTestCase): self.assertEqual([], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': '\x123'}}, - instance_json['ds']['user_data']) + instance_json['ds']['meta_data']) @skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8") def test_non_utf8_encoding_logs_warning(self): @@ -395,7 +451,7 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) self.assertFalse(os.path.exists(json_file)) @@ -509,4 +565,36 @@ class TestDataSource(CiTestCase): self.logs.getvalue()) +class TestRedactSensitiveData(CiTestCase): + + def test_redact_sensitive_data_noop_when_no_sensitive_keys_present(self): + """When sensitive_keys is absent or empty from metadata do nothing.""" + md = {'my': 'data'} + self.assertEqual( + md, redact_sensitive_keys(md, redact_value='redacted')) + md['sensitive_keys'] = [] + self.assertEqual( + md, redact_sensitive_keys(md, redact_value='redacted')) + + def test_redact_sensitive_data_redacts_exact_match_name(self): + """Only exact matched sensitive_keys are redacted from metadata.""" + md = {'sensitive_keys': ['md/secure'], + 'md': {'secure': 's3kr1t', 'insecure': 'publik'}} + secure_md = copy.deepcopy(md) + secure_md['md']['secure'] = 'redacted' + self.assertEqual( + secure_md, + redact_sensitive_keys(md, redact_value='redacted')) + + def test_redact_sensitive_data_does_redacts_with_default_string(self): + """When redact_value is absent, REDACT_SENSITIVE_VALUE is used.""" + md = {'sensitive_keys': ['md/secure'], + 'md': {'secure': 's3kr1t', 'insecure': 'publik'}} + secure_md = copy.deepcopy(md) + secure_md['md']['secure'] = 'redacted for non-root user' + self.assertEqual( + secure_md, + redact_sensitive_keys(md)) + + # vi: ts=4 expandtab diff --git a/doc/rtd/index.rst b/doc/rtd/index.rst index de67f361..20a99a30 100644 --- a/doc/rtd/index.rst +++ b/doc/rtd/index.rst @@ -31,6 +31,7 @@ initialization of a cloud instance. topics/capabilities.rst topics/availability.rst topics/format.rst + topics/instancedata.rst topics/dir_layout.rst topics/examples.rst topics/boot.rst diff --git a/doc/rtd/topics/capabilities.rst b/doc/rtd/topics/capabilities.rst index 2d8e2538..0d8b8947 100644 --- a/doc/rtd/topics/capabilities.rst +++ b/doc/rtd/topics/capabilities.rst @@ -18,7 +18,7 @@ User configurability User-data can be given by the user at instance launch time. See :ref:`user_data_formats` for acceptable user-data content. - + This is done via the ``--user-data`` or ``--user-data-file`` argument to ec2-run-instances for example. @@ -53,10 +53,9 @@ system: % cloud-init --help usage: cloud-init [-h] [--version] [--file FILES] - [--debug] [--force] - {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} - ... + {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status} + ... optional arguments: -h, --help show this help message and exit @@ -68,17 +67,19 @@ system: your own risk) Subcommands: - {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} + {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status} init initializes cloud-init and performs initial modules modules activates modules using a given configuration key single run a single module + query Query instance metadata from the command line dhclient-hook run the dhclient hookto record network info features list defined features analyze Devel tool: Analyze cloud-init logs and data devel Run development tools collect-logs Collect and tar all cloud-init debug info - clean Remove logs and artifacts so cloud-init can re-run. - status Report cloud-init status or wait on completion. + clean Remove logs and artifacts so cloud-init can re-run + status Report cloud-init status or wait on completion + CLI Subcommand details ====================== @@ -104,8 +105,8 @@ cloud-init status Report whether cloud-init is running, done, disabled or errored. Exits non-zero if an error is detected in cloud-init. - * **--long**: Detailed status information. - * **--wait**: Block until cloud-init completes. +* **--long**: Detailed status information. +* **--wait**: Block until cloud-init completes. .. code-block:: shell-session @@ -143,6 +144,68 @@ Logs collected are: * journalctl output * /var/lib/cloud/instance/user-data.txt +.. _cli_query: + +cloud-init query +------------------ +Query standardized cloud instance metadata crawled by cloud-init and stored +in ``/run/cloud-init/instance-data.json``. This is a convenience command-line +interface to reference any cached configuration metadata that cloud-init +crawls when booting the instance. See :ref:`instance_metadata` for more info. + +* **--all**: Dump all available instance data as json which can be queried. +* **--instance-data**: Optional path to a different instance-data.json file to + source for queries. +* **--list-keys**: List available query keys from cached instance data. + +.. code-block:: shell-session + + # List all top-level query keys available (includes standardized aliases) + % cloud-init query --list-keys + availability_zone + base64_encoded_keys + cloud_name + ds + instance_id + local_hostname + region + v1 + +* ****: A dot-delimited variable path into the instance-data.json + object. + +.. code-block:: shell-session + + # Query cloud-init standardized metadata on any cloud + % cloud-init query v1.cloud_name + aws # or openstack, azure, gce etc. + + # Any standardized instance-data under a key is aliased as a top-level + # key for convenience. + % cloud-init query cloud_name + aws # or openstack, azure, gce etc. + + # Query datasource-specific metadata on EC2 + % cloud-init query ds.meta_data.public_ipv4 + +* **--format** A string that will use jinja-template syntax to render a string + replacing + +.. code-block:: shell-session + + # Generate a custom hostname fqdn based on instance-id, cloud and region + % cloud-init query --format 'custom-{{instance_id}}.{{region}}.{{v1.cloud_name}}.com' + custom-i-0e91f69987f37ec74.us-east-2.aws.com + + +.. note:: + The standardized instance data keys under **v#** are guaranteed not to change + behavior or format. If using top-level convenience aliases for any + standardized instance data keys, the most value (highest **v#**) of that key + name is what is reported as the top-level value. So these aliases act as a + 'latest'. + + .. _cli_analyze: cloud-init analyze @@ -150,10 +213,10 @@ cloud-init analyze Get detailed reports of where cloud-init spends most of its time. See :ref:`boot_time_analysis` for more info. - * **blame** Report ordered by most costly operations. - * **dump** Machine-readable JSON dump of all cloud-init tracked events. - * **show** show time-ordered report of the cost of operations during each - boot stage. +* **blame** Report ordered by most costly operations. +* **dump** Machine-readable JSON dump of all cloud-init tracked events. +* **show** show time-ordered report of the cost of operations during each + boot stage. .. _cli_devel: @@ -182,8 +245,8 @@ cloud-init clean Remove cloud-init artifacts from /var/lib/cloud and optionally reboot the machine to so cloud-init re-runs all stages as it did on first boot. - * **--logs**: Optionally remove /var/log/cloud-init*log files. - * **--reboot**: Reboot the system after removing artifacts. +* **--logs**: Optionally remove /var/log/cloud-init*log files. +* **--reboot**: Reboot the system after removing artifacts. .. _cli_init: @@ -195,7 +258,7 @@ Can be run on the commandline, but is generally gated to run only once due to semaphores in **/var/lib/cloud/instance/sem/** and **/var/lib/cloud/sem**. - * **--local**: Run *init-local* stage instead of *init*. +* **--local**: Run *init-local* stage instead of *init*. .. _cli_modules: @@ -210,8 +273,8 @@ declared to run in various boot stages in the file commandline, but each module is gated to run only once due to semaphores in ``/var/lib/cloud/``. - * **--mode (init|config|final)**: Run *modules:init*, *modules:config* or - *modules:final* cloud-init stages. See :ref:`boot_stages` for more info. +* **--mode (init|config|final)**: Run *modules:init*, *modules:config* or + *modules:final* cloud-init stages. See :ref:`boot_stages` for more info. .. _cli_single: @@ -221,9 +284,9 @@ Attempt to run a single named cloud config module. The following example re-runs the cc_set_hostname module ignoring the module default frequency of once-per-instance: - * **--name**: The cloud-config module name to run - * **--frequency**: Optionally override the declared module frequency - with one of (always|once-per-instance|once) +* **--name**: The cloud-config module name to run +* **--frequency**: Optionally override the declared module frequency + with one of (always|once-per-instance|once) .. code-block:: shell-session diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index 14432e65..e34f145c 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -17,146 +17,10 @@ own way) internally a datasource abstract class was created to allow for a single way to access the different cloud systems methods to provide this data through the typical usage of subclasses. - -.. _instance_metadata: - -instance-data -------------- -For reference, cloud-init stores all the metadata, vendordata and userdata -provided by a cloud in a json blob at ``/run/cloud-init/instance-data.json``. -While the json contains datasource-specific keys and names, cloud-init will -maintain a minimal set of standardized keys that will remain stable on any -cloud. Standardized instance-data keys will be present under a "v1" key. -Any datasource metadata cloud-init consumes will all be present under the -"ds" key. - -Below is an instance-data.json example from an OpenStack instance: - -.. sourcecode:: json - - { - "base64-encoded-keys": [ - "ds/meta-data/random_seed", - "ds/user-data" - ], - "ds": { - "ec2_metadata": { - "ami-id": "ami-0000032f", - "ami-launch-index": "0", - "ami-manifest-path": "FIXME", - "block-device-mapping": { - "ami": "vda", - "ephemeral0": "/dev/vdb", - "root": "/dev/vda" - }, - "hostname": "xenial-test.novalocal", - "instance-action": "none", - "instance-id": "i-0006e030", - "instance-type": "m1.small", - "local-hostname": "xenial-test.novalocal", - "local-ipv4": "10.5.0.6", - "placement": { - "availability-zone": "None" - }, - "public-hostname": "xenial-test.novalocal", - "public-ipv4": "10.245.162.145", - "reservation-id": "r-fxm623oa", - "security-groups": "default" - }, - "meta-data": { - "availability_zone": null, - "devices": [], - "hostname": "xenial-test.novalocal", - "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0", - "launch_index": 0, - "local-hostname": "xenial-test.novalocal", - "name": "xenial-test", - "project_id": "e0eb2d2538814...", - "random_seed": "A6yPN...", - "uuid": "3e39d278-0644-4728-9479-678f92..." - }, - "network_json": { - "links": [ - { - "ethernet_mac_address": "fa:16:3e:7d:74:9b", - "id": "tap9ca524d5-6e", - "mtu": 8958, - "type": "ovs", - "vif_id": "9ca524d5-6e5a-4809-936a-6901..." - } - ], - "networks": [ - { - "id": "network0", - "link": "tap9ca524d5-6e", - "network_id": "c6adfc18-9753-42eb-b3ea-18b57e6b837f", - "type": "ipv4_dhcp" - } - ], - "services": [ - { - "address": "10.10.160.2", - "type": "dns" - } - ] - }, - "user-data": "I2Nsb3VkLWNvbmZpZ...", - "vendor-data": null - }, - "v1": { - "availability-zone": null, - "cloud-name": "openstack", - "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0", - "local-hostname": "xenial-test", - "region": null - } - } - - -As of cloud-init v. 18.4, any values present in -``/run/cloud-init/instance-data.json`` can be used in cloud-init user data -scripts or cloud config data. This allows consumers to use cloud-init's -vendor-neutral, standardized metadata keys as well as datasource-specific -content for any scripts or cloud-config modules they are using. - -To use instance-data.json values in scripts and **#config-config** files the -user-data will need to contain the following header as the first line **## template: jinja**. Cloud-init will source all variables defined in -``/run/cloud-init/instance-data.json`` and allow scripts or cloud-config files -to reference those paths. Below are two examples:: - - * Cloud config calling home with the ec2 public hostname and avaliability-zone - ``` - ## template: jinja - #cloud-config - runcmd: - - echo 'EC2 public hostname allocated to instance: {{ ds.meta_data.public_hostname }}' > /tmp/instance_metadata - - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> /tmp/instance_metadata - - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", "availability-zone": "{{ v1.availability_zone }}"}' https://example.com.com - ``` - - * Custom user script performing different operations based on region - ``` - ## template: jinja - #!/bin/bash - {% if v1.region == 'us-east-2' -%} - echo 'Installing custom proxies for {{ v1.region }} - sudo apt-get install my-xtra-fast-stack - {%- endif %} - ... - - ``` - -.. note:: - Trying to reference jinja variables that don't exist in - instance-data.json will result in warnings in ``/var/log/cloud-init.log`` - and the following string in your rendered user-data: - ``CI_MISSING_JINJA_VAR/``. - -.. note:: - To save time designing your user-data for a specific cloud's - instance-data.json, use the 'render' cloud-init command on an - instance booted on your favorite cloud. See :ref:`cli_devel` for more - information. +Any metadata processed by cloud-init's datasources is persisted as +``/run/cloud0-init/instance-data.json``. Cloud-init provides tooling +to quickly introspect some of that data. See :ref:`instance_metadata` for +more information. Datasource API @@ -196,14 +60,14 @@ The current interface that a datasource object must provide is the following: # or does not exist) def device_name_to_device(self, name) - # gets the locale string this instance should be applying + # gets the locale string this instance should be applying # which typically used to adjust the instances locale settings files def get_locale(self) @property def availability_zone(self) - # gets the instance id that was assigned to this instance by the + # gets the instance id that was assigned to this instance by the # cloud provider or when said instance id does not exist in the backing # metadata this will return 'iid-datasource' def get_instance_id(self) diff --git a/doc/rtd/topics/instancedata.rst b/doc/rtd/topics/instancedata.rst new file mode 100644 index 00000000..634e1807 --- /dev/null +++ b/doc/rtd/topics/instancedata.rst @@ -0,0 +1,297 @@ +.. _instance_metadata: + +***************** +Instance Metadata +***************** + +What is a instance data? +======================== + +Instance data is the collection of all configuration data that cloud-init +processes to configure the instance. This configuration typically +comes from any number of sources: + +* cloud-provided metadata services (aka metadata) +* custom config-drive attached to the instance +* cloud-config seed files in the booted cloud image or distribution +* vendordata provided from files or cloud metadata services +* userdata provided at instance creation + +Each cloud provider presents unique configuration metadata in different +formats to the instance. Cloud-init provides a cache of any crawled metadata +as well as a versioned set of standardized instance data keys which it makes +available on all platforms. + +Cloud-init produces a simple json object in +``/run/cloud-init/instance-data.json`` which represents standardized and +versioned representation of the metadata it consumes during initial boot. The +intent is to provide the following benefits to users or scripts on any system +deployed with cloud-init: + +* simple static object to query to obtain a instance's metadata +* speed: avoid costly network transactions for metadata that is already cached + on the filesytem +* reduce need to recrawl metadata services for static metadata that is already + cached +* leverage cloud-init's best practices for crawling cloud-metadata services +* avoid rolling unique metadata crawlers on each cloud platform to get + metadata configuration values + +Cloud-init stores any instance data processed in the following files: + +* ``/run/cloud-init/instance-data.json``: world-readable json containing + standardized keys, sensitive keys redacted +* ``/run/cloud-init/instance-data-sensitive.json``: root-readable unredacted + json blob +* ``/var/lib/cloud/instance/user-data.txt``: root-readable sensitive raw + userdata +* ``/var/lib/cloud/instance/vendor-data.txt``: root-readable sensitive raw + vendordata + +Cloud-init redacts any security sensitive content from instance-data.json, +stores ``/run/cloud-init/instance-data.json`` as a world-readable json file. +Because user-data and vendor-data can contain passwords both of these files +are readonly for *root* as well. The *root* user can also read +``/run/cloud-init/instance-data-sensitive.json`` which is all instance data +from instance-data.json as well as unredacted sensitive content. + + +Format of instance-data.json +============================ + +The instance-data.json and instance-data-sensitive.json files are well-formed +JSON and record the set of keys and values for any metadata processed by +cloud-init. Cloud-init standardizes the format for this content so that it +can be generalized across different cloud platforms. + +There are three basic top-level keys: + +* **base64_encoded_keys**: A list of forward-slash delimited key paths into + the instance-data.json object whose value is base64encoded for json + compatibility. Values at these paths should be decoded to get the original + value. + +* **sensitive_keys**: A list of forward-slash delimited key paths into + the instance-data.json object whose value is considered by the datasource as + 'security sensitive'. Only the keys listed here will be redacted from + instance-data.json for non-root users. + +* **ds**: Datasource-specific metadata crawled for the specific cloud + platform. It should closely represent the structure of the cloud metadata + crawled. The structure of content and details provided are entirely + cloud-dependent. Mileage will vary depending on what the cloud exposes. + The content exposed under the 'ds' key is currently **experimental** and + expected to change slightly in the upcoming cloud-init release. + +* **v1**: Standardized cloud-init metadata keys, these keys are guaranteed to + exist on all cloud platforms. They will also retain their current behavior + and format and will be carried forward even if cloud-init introduces a new + version of standardized keys with **v2**. + +The standardized keys present: + ++----------------------+-----------------------------------------------+---------------------------+ +| Key path | Description | Examples | ++======================+===============================================+===========================+ +| v1.cloud_name | The name of the cloud provided by metadata | aws, openstack, azure, | +| | key 'cloud-name' or the cloud-init datasource | configdrive, nocloud, | +| | name which was discovered. | ovf, etc. | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.instance_id | Unique instance_id allocated by the cloud | i- | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.local_hostname | The internal or local hostname of the system | ip-10-41-41-70, | +| | | | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.region | The physical region/datacenter in which the | us-east-2 | +| | instance is deployed | | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.availability_zone | The physical availability zone in which the | us-east-2b, nova, null | +| | instance is deployed | | ++----------------------+-----------------------------------------------+---------------------------+ + + +Below is an example of ``/run/cloud-init/instance_data.json`` on an EC2 +instance: + +.. sourcecode:: json + + { + "base64_encoded_keys": [], + "sensitive_keys": [], + "ds": { + "meta_data": { + "ami-id": "ami-014e1416b628b0cbf", + "ami-launch-index": "0", + "ami-manifest-path": "(unknown)", + "block-device-mapping": { + "ami": "/dev/sda1", + "ephemeral0": "sdb", + "ephemeral1": "sdc", + "root": "/dev/sda1" + }, + "hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "instance-action": "none", + "instance-id": "i-04fa31cfc55aa7976", + "instance-type": "t2.micro", + "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "local-ipv4": "10.41.41.70", + "mac": "06:b6:92:dd:9d:24", + "metrics": { + "vhostmd": "" + }, + "network": { + "interfaces": { + "macs": { + "06:b6:92:dd:9d:24": { + "device-number": "0", + "interface-id": "eni-08c0c9fdb99b6e6f4", + "ipv4-associations": { + "18.224.22.43": "10.41.41.70" + }, + "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "local-ipv4s": "10.41.41.70", + "mac": "06:b6:92:dd:9d:24", + "owner-id": "437526006925", + "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", + "public-ipv4s": "18.224.22.43", + "security-group-ids": "sg-828247e9", + "security-groups": "Cloud-init integration test secgroup", + "subnet-id": "subnet-282f3053", + "subnet-ipv4-cidr-block": "10.41.41.0/24", + "subnet-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/64", + "vpc-id": "vpc-252ef24d", + "vpc-ipv4-cidr-block": "10.41.0.0/16", + "vpc-ipv4-cidr-blocks": "10.41.0.0/16", + "vpc-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/56" + } + } + } + }, + "placement": { + "availability-zone": "us-east-2b" + }, + "profile": "default-hvm", + "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", + "public-ipv4": "18.224.22.43", + "public-keys": { + "cloud-init-integration": [ + "ssh-rsa + AAAAB3NzaC1yc2EAAAADAQABAAABAQDSL7uWGj8cgWyIOaspgKdVy0cKJ+UTjfv7jBOjG2H/GN8bJVXy72XAvnhM0dUM+CCs8FOf0YlPX+Frvz2hKInrmRhZVwRSL129PasD12MlI3l44u6IwS1o/W86Q+tkQYEljtqDOo0a+cOsaZkvUNzUyEXUwz/lmYa6G4hMKZH4NBj7nbAAF96wsMCoyNwbWryBnDYUr6wMbjRR1J9Pw7Xh7WRC73wy4Va2YuOgbD3V/5ZrFPLbWZW/7TFXVrql04QVbyei4aiFR5n//GvoqwQDNe58LmbzX/xvxyKJYdny2zXmdAhMxbrpFQsfpkJ9E/H5w0yOdSvnWbUoG5xNGoOB + cloud-init-integration" + ] + }, + "reservation-id": "r-06ab75e9346f54333", + "security-groups": "Cloud-init integration test secgroup", + "services": { + "domain": "amazonaws.com", + "partition": "aws" + } + } + }, + "v1": { + "availability-zone": "us-east-2b", + "availability_zone": "us-east-2b", + "cloud-name": "aws", + "cloud_name": "aws", + "instance-id": "i-04fa31cfc55aa7976", + "instance_id": "i-04fa31cfc55aa7976", + "local-hostname": "ip-10-41-41-70", + "local_hostname": "ip-10-41-41-70", + "region": "us-east-2" + } + } + + +Using instance-data +=================== + +As of cloud-init v. 18.4, any variables present in +``/run/cloud-init/instance-data.json`` can be used in: + +* User-data scripts +* Cloud config data +* Command line interface via **cloud-init query** or + **cloud-init devel render** + +Many clouds allow users to provide user-data to an instance at +the time the instance is launched. Cloud-init supports a number of +:ref:`user_data_formats`. + +Both user-data scripts and **#cloud-config** data support jinja template +rendering. +When the first line of the provided user-data begins with, +**## template: jinja** cloud-init will use jinja to render that file. +Any instance-data-sensitive.json variables are surfaced as dot-delimited +jinja template variables because cloud-config modules are run as 'root' +user. + + +Below are some examples of providing these types of user-data: + +* Cloud config calling home with the ec2 public hostname and avaliability-zone + +.. code-block:: shell-session + + ## template: jinja + #cloud-config + runcmd: + - echo 'EC2 public hostname allocated to instance: {{ + ds.meta_data.public_hostname }}' > /tmp/instance_metadata + - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> + /tmp/instance_metadata + - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", + "availability-zone": "{{ v1.availability_zone }}"}' + https://example.com + +* Custom user-data script performing different operations based on region + +.. code-block:: shell-session + + ## template: jinja + #!/bin/bash + {% if v1.region == 'us-east-2' -%} + echo 'Installing custom proxies for {{ v1.region }} + sudo apt-get install my-xtra-fast-stack + {%- endif %} + ... + +.. note:: + Trying to reference jinja variables that don't exist in + instance-data.json will result in warnings in ``/var/log/cloud-init.log`` + and the following string in your rendered user-data: + ``CI_MISSING_JINJA_VAR/``. + +Cloud-init also surfaces a commandline tool **cloud-init query** which can +assist developers or scripts with obtaining instance metadata easily. See +:ref:`cli_query` for more information. + +To cut down on keystrokes on the command line, cloud-init also provides +top-level key aliases for any standardized ``v#`` keys present. The preceding +``v1`` is not required of ``v1.var_name`` These aliases will represent the +value of the highest versioned standard key. For example, ``cloud_name`` +value will be ``v2.cloud_name`` if both ``v1`` and ``v2`` keys are present in +instance-data.json. +The **query** command also publishes ``userdata`` and ``vendordata`` keys to +the root user which will contain the decoded user and vendor data provided to +this instance. Non-root users referencing userdata or vendordata keys will +see only redacted values. + +.. code-block:: shell-session + + # List all top-level instance-data keys available + % cloud-init query --list-keys + + # Find your EC2 ami-id + % cloud-init query ds.metadata.ami_id + + # Format your cloud_name and region using jinja template syntax + % cloud-init query --format 'cloud: {{ v1.cloud_name }} myregion: {{ + % v1.region }}' + +.. note:: + To save time designing a user-data template for a specific cloud's + instance-data.json, use the 'render' cloud-init command on an + instance booted on your favorite cloud. See :ref:`cli_devel` for more + information. + +.. vi: textwidth=78 diff --git a/integration-requirements.txt b/integration-requirements.txt index f80cb942..880d9886 100644 --- a/integration-requirements.txt +++ b/integration-requirements.txt @@ -5,16 +5,17 @@ # the packages/pkg-deps.json file as well. # +unittest2 # ec2 backend boto3==1.5.9 # ssh communication paramiko==2.4.1 + # lxd backend # 04/03/2018: enables use of lxd 3.0 git+https://github.com/lxc/pylxd.git@4b8ab1802f9aee4eb29cf7b119dae0aa47150779 - # finds latest image information git+https://git.launchpad.net/simplestreams diff --git a/tests/cloud_tests/testcases/base.py b/tests/cloud_tests/testcases/base.py index 27458271..c5457968 100644 --- a/tests/cloud_tests/testcases/base.py +++ b/tests/cloud_tests/testcases/base.py @@ -5,15 +5,15 @@ import crypt import json import re -import unittest +import unittest2 from cloudinit import util as c_util -SkipTest = unittest.SkipTest +SkipTest = unittest2.SkipTest -class CloudTestCase(unittest.TestCase): +class CloudTestCase(unittest2.TestCase): """Base test class for verifiers.""" # data gets populated in get_suite.setUpClass @@ -167,8 +167,9 @@ class CloudTestCase(unittest.TestCase): 'Skipping instance-data.json test.' ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) - self.assertEqual( - ['ds/user_data'], instance_data['base64_encoded_keys']) + self.assertItemsEqual( + [], + instance_data['base64_encoded_keys']) ds = instance_data.get('ds', {}) v1_data = instance_data.get('v1', {}) metadata = ds.get('meta-data', {}) @@ -187,10 +188,10 @@ class CloudTestCase(unittest.TestCase): metadata.get('placement', {}).get('availability-zone'), 'Could not determine EC2 Availability zone placement') self.assertIsNotNone( - v1_data['availability-zone'], 'expected ec2 availability-zone') - self.assertEqual('aws', v1_data['cloud-name']) - self.assertIn('i-', v1_data['instance-id']) - self.assertIn('ip-', v1_data['local-hostname']) + v1_data['availability_zone'], 'expected ec2 availability_zone') + self.assertEqual('aws', v1_data['cloud_name']) + self.assertIn('i-', v1_data['instance_id']) + self.assertIn('ip-', v1_data['local_hostname']) self.assertIsNotNone(v1_data['region'], 'expected ec2 region') def test_instance_data_json_lxd(self): @@ -213,16 +214,14 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) - self.assertEqual( - ['ds/user_data', 'ds/vendor_data'], - sorted(instance_data['base64_encoded_keys'])) - self.assertEqual('nocloud', v1_data['cloud-name']) + self.assertItemsEqual([], sorted(instance_data['base64_encoded_keys'])) + self.assertEqual('nocloud', v1_data['cloud_name']) self.assertIsNone( - v1_data['availability-zone'], - 'found unexpected lxd availability-zone %s' % - v1_data['availability-zone']) - self.assertIn('cloud-test', v1_data['instance-id']) - self.assertIn('cloud-test', v1_data['local-hostname']) + v1_data['availability_zone'], + 'found unexpected lxd availability_zone %s' % + v1_data['availability_zone']) + self.assertIn('cloud-test', v1_data['instance_id']) + self.assertIn('cloud-test', v1_data['local_hostname']) self.assertIsNone( v1_data['region'], 'found unexpected lxd region %s' % v1_data['region']) @@ -248,18 +247,17 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) - self.assertEqual( - ['ds/user_data'], instance_data['base64_encoded_keys']) - self.assertEqual('nocloud', v1_data['cloud-name']) + self.assertItemsEqual([], instance_data['base64_encoded_keys']) + self.assertEqual('nocloud', v1_data['cloud_name']) self.assertIsNone( - v1_data['availability-zone'], - 'found unexpected kvm availability-zone %s' % - v1_data['availability-zone']) + v1_data['availability_zone'], + 'found unexpected kvm availability_zone %s' % + v1_data['availability_zone']) self.assertIsNotNone( re.match(r'[\da-f]{8}(-[\da-f]{4}){3}-[\da-f]{12}', - v1_data['instance-id']), - 'kvm instance-id is not a UUID: %s' % v1_data['instance-id']) - self.assertIn('ubuntu', v1_data['local-hostname']) + v1_data['instance_id']), + 'kvm instance_id is not a UUID: %s' % v1_data['instance_id']) + self.assertIn('ubuntu', v1_data['local_hostname']) self.assertIsNone( v1_data['region'], 'found unexpected lxd region %s' % v1_data['region']) -- cgit v1.2.3