diff options
| -rw-r--r-- | bash_completion/cloud-init | 4 | ||||
| -rwxr-xr-x | cloudinit/cmd/devel/render.py | 7 | ||||
| -rw-r--r-- | cloudinit/cmd/main.py | 10 | ||||
| -rw-r--r-- | cloudinit/cmd/query.py | 155 | ||||
| -rw-r--r-- | cloudinit/cmd/tests/test_query.py | 193 | ||||
| -rw-r--r-- | cloudinit/helpers.py | 4 | ||||
| -rw-r--r-- | cloudinit/sources/__init__.py | 76 | ||||
| -rw-r--r-- | cloudinit/sources/tests/test_init.py | 130 | ||||
| -rw-r--r-- | doc/rtd/index.rst | 1 | ||||
| -rw-r--r-- | doc/rtd/topics/capabilities.rst | 105 | ||||
| -rw-r--r-- | doc/rtd/topics/datasources.rst | 148 | ||||
| -rw-r--r-- | doc/rtd/topics/instancedata.rst | 297 | ||||
| -rw-r--r-- | integration-requirements.txt | 3 | ||||
| -rw-r--r-- | tests/cloud_tests/testcases/base.py | 52 | 
14 files changed, 952 insertions, 233 deletions
| diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init index 6d01bf3a..8c25032f 100644 --- a/bash_completion/cloud-init +++ b/bash_completion/cloud-init @@ -10,7 +10,7 @@ _cloudinit_complete()      cur_word="${COMP_WORDS[COMP_CWORD]}"      prev_word="${COMP_WORDS[COMP_CWORD-1]}" -    subcmds="analyze clean collect-logs devel dhclient-hook features init modules single status" +    subcmds="analyze clean collect-logs devel dhclient-hook features init modules query single status"      base_params="--help --file --version --debug --force"      case ${COMP_CWORD} in          1) @@ -40,6 +40,8 @@ _cloudinit_complete()                      COMPREPLY=($(compgen -W "--help --mode" -- $cur_word))                      ;; +                query) +                    COMPREPLY=($(compgen -W "--all --help --instance-data --list-keys --user-data --vendor-data --debug" -- $cur_word));;                  single)                      COMPREPLY=($(compgen -W "--help --name --frequency --report" -- $cur_word))                      ;; diff --git a/cloudinit/cmd/devel/render.py b/cloudinit/cmd/devel/render.py index e85933db..2ba6b681 100755 --- a/cloudinit/cmd/devel/render.py +++ b/cloudinit/cmd/devel/render.py @@ -9,7 +9,6 @@ import sys  from cloudinit.handlers.jinja_template import render_jinja_payload_from_file  from cloudinit import log  from cloudinit.sources import INSTANCE_JSON_FILE -from cloudinit import util  from . import addLogHandlerCLI, read_cfg_paths  NAME = 'render' @@ -54,11 +53,7 @@ def handle_args(name, args):              paths.run_dir, INSTANCE_JSON_FILE)      else:          instance_data_fn = args.instance_data -    try: -        with open(instance_data_fn) as stream: -            instance_data = stream.read() -        instance_data = util.load_json(instance_data) -    except IOError: +    if not os.path.exists(instance_data_fn):          LOG.error('Missing instance-data.json file: %s', instance_data_fn)          return 1      try: diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 0eee583c..5a437020 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -791,6 +791,10 @@ def main(sysv_args=None):                                       ' pass to this module'))      parser_single.set_defaults(action=('single', main_single)) +    parser_query = subparsers.add_parser( +        'query', +        help='Query standardized instance metadata from the command line.') +      parser_dhclient = subparsers.add_parser('dhclient-hook',                                              help=('run the dhclient hook'                                                    'to record network info')) @@ -842,6 +846,12 @@ def main(sysv_args=None):              clean_parser(parser_clean)              parser_clean.set_defaults(                  action=('clean', handle_clean_args)) +        elif sysv_args[0] == 'query': +            from cloudinit.cmd.query import ( +                get_parser as query_parser, handle_args as handle_query_args) +            query_parser(parser_query) +            parser_query.set_defaults( +                action=('render', handle_query_args))          elif sysv_args[0] == 'status':              from cloudinit.cmd.status import (                  get_parser as status_parser, handle_status_args) diff --git a/cloudinit/cmd/query.py b/cloudinit/cmd/query.py new file mode 100644 index 00000000..7d2d4fe4 --- /dev/null +++ b/cloudinit/cmd/query.py @@ -0,0 +1,155 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Query standardized instance metadata from the command line.""" + +import argparse +import os +import six +import sys + +from cloudinit.handlers.jinja_template import ( +    convert_jinja_instance_data, render_jinja_payload) +from cloudinit.cmd.devel import addLogHandlerCLI, read_cfg_paths +from cloudinit import log +from cloudinit.sources import ( +    INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE) +from cloudinit import util + +NAME = 'query' +LOG = log.getLogger(NAME) + + +def get_parser(parser=None): +    """Build or extend an arg parser for query utility. + +    @param parser: Optional existing ArgumentParser instance representing the +        query subcommand which will be extended to support the args of +        this utility. + +    @returns: ArgumentParser with proper argument configuration. +    """ +    if not parser: +        parser = argparse.ArgumentParser( +            prog=NAME, description='Query cloud-init instance data') +    parser.add_argument( +        '-d', '--debug', action='store_true', default=False, +        help='Add verbose messages during template render') +    parser.add_argument( +        '-i', '--instance-data', type=str, +        help=('Path to instance-data.json file. Default is /run/cloud-init/%s' +              % INSTANCE_JSON_FILE)) +    parser.add_argument( +        '-l', '--list-keys', action='store_true', default=False, +        help=('List query keys available at the provided instance-data' +              ' <varname>.')) +    parser.add_argument( +        '-u', '--user-data', type=str, +        help=('Path to user-data file. Default is' +              ' /var/lib/cloud/instance/user-data.txt')) +    parser.add_argument( +        '-v', '--vendor-data', type=str, +        help=('Path to vendor-data file. Default is' +              ' /var/lib/cloud/instance/vendor-data.txt')) +    parser.add_argument( +        'varname', type=str, nargs='?', +        help=('A dot-delimited instance data variable to query from' +              ' instance-data query. For example: v2.local_hostname')) +    parser.add_argument( +        '-a', '--all', action='store_true', default=False, dest='dump_all', +        help='Dump all available instance-data') +    parser.add_argument( +        '-f', '--format', type=str, dest='format', +        help=('Optionally specify a custom output format string. Any' +              ' instance-data variable can be specified between double-curly' +              ' braces. For example -f "{{ v2.cloud_name }}"')) +    return parser + + +def handle_args(name, args): +    """Handle calls to 'cloud-init query' as a subcommand.""" +    paths = None +    addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING) +    if not any([args.list_keys, args.varname, args.format, args.dump_all]): +        LOG.error( +            'Expected one of the options: --all, --format,' +            ' --list-keys or varname') +        get_parser().print_help() +        return 1 + +    uid = os.getuid() +    if not all([args.instance_data, args.user_data, args.vendor_data]): +        paths = read_cfg_paths() +    if not args.instance_data: +        if uid == 0: +            default_json_fn = INSTANCE_JSON_SENSITIVE_FILE +        else: +            default_json_fn = INSTANCE_JSON_FILE  # World readable +        instance_data_fn = os.path.join(paths.run_dir, default_json_fn) +    else: +        instance_data_fn = args.instance_data +    if not args.user_data: +        user_data_fn = os.path.join(paths.instance_link, 'user-data.txt') +    else: +        user_data_fn = args.user_data +    if not args.vendor_data: +        vendor_data_fn = os.path.join(paths.instance_link, 'vendor-data.txt') +    else: +        vendor_data_fn = args.vendor_data + +    try: +        instance_json = util.load_file(instance_data_fn) +    except IOError: +        LOG.error('Missing instance-data.json file: %s', instance_data_fn) +        return 1 + +    instance_data = util.load_json(instance_json) +    if uid != 0: +        instance_data['userdata'] = ( +            '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, user_data_fn)) +        instance_data['vendordata'] = ( +            '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, vendor_data_fn)) +    else: +        instance_data['userdata'] = util.load_file(user_data_fn) +        instance_data['vendordata'] = util.load_file(vendor_data_fn) +    if args.format: +        payload = '## template: jinja\n{fmt}'.format(fmt=args.format) +        rendered_payload = render_jinja_payload( +            payload=payload, payload_fn='query commandline', +            instance_data=instance_data, +            debug=True if args.debug else False) +        if rendered_payload: +            print(rendered_payload) +            return 0 +        return 1 + +    response = convert_jinja_instance_data(instance_data) +    if args.varname: +        try: +            for var in args.varname.split('.'): +                response = response[var] +        except KeyError: +            LOG.error('Undefined instance-data key %s', args.varname) +            return 1 +        if args.list_keys: +            if not isinstance(response, dict): +                LOG.error("--list-keys provided but '%s' is not a dict", var) +                return 1 +            response = '\n'.join(sorted(response.keys())) +    elif args.list_keys: +        response = '\n'.join(sorted(response.keys())) +    if not isinstance(response, six.string_types): +        response = util.json_dumps(response) +    print(response) +    return 0 + + +def main(): +    """Tool to query specific instance-data values.""" +    parser = get_parser() +    sys.exit(handle_args(NAME, parser.parse_args())) + + +if __name__ == '__main__': +    main() + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/tests/test_query.py b/cloudinit/cmd/tests/test_query.py new file mode 100644 index 00000000..fb87c6ab --- /dev/null +++ b/cloudinit/cmd/tests/test_query.py @@ -0,0 +1,193 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from six import StringIO +from textwrap import dedent +import os + +from collections import namedtuple +from cloudinit.cmd import query +from cloudinit.helpers import Paths +from cloudinit.sources import REDACT_SENSITIVE_VALUE, INSTANCE_JSON_FILE +from cloudinit.tests.helpers import CiTestCase, mock +from cloudinit.util import ensure_dir, write_file + + +class TestQuery(CiTestCase): + +    with_logs = True + +    args = namedtuple( +        'queryargs', +        ('debug dump_all format instance_data list_keys user_data vendor_data' +         ' varname')) + +    def setUp(self): +        super(TestQuery, self).setUp() +        self.tmp = self.tmp_dir() +        self.instance_data = self.tmp_path('instance-data', dir=self.tmp) + +    def test_handle_args_error_on_missing_param(self): +        """Error when missing required parameters and print usage.""" +        args = self.args( +            debug=False, dump_all=False, format=None, instance_data=None, +            list_keys=False, user_data=None, vendor_data=None, varname=None) +        with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: +            with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: +                self.assertEqual(1, query.handle_args('anyname', args)) +        expected_error = ( +            'ERROR: Expected one of the options: --all, --format, --list-keys' +            ' or varname\n') +        self.assertIn(expected_error, self.logs.getvalue()) +        self.assertIn('usage: query', m_stdout.getvalue()) +        self.assertIn(expected_error, m_stderr.getvalue()) + +    def test_handle_args_error_on_missing_instance_data(self): +        """When instance_data file path does not exist, log an error.""" +        absent_fn = self.tmp_path('absent', dir=self.tmp) +        args = self.args( +            debug=False, dump_all=True, format=None, instance_data=absent_fn, +            list_keys=False, user_data='ud', vendor_data='vd', varname=None) +        with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: +            self.assertEqual(1, query.handle_args('anyname', args)) +        self.assertIn( +            'ERROR: Missing instance-data.json file: %s' % absent_fn, +            self.logs.getvalue()) +        self.assertIn( +            'ERROR: Missing instance-data.json file: %s' % absent_fn, +            m_stderr.getvalue()) + +    def test_handle_args_defaults_instance_data(self): +        """When no instance_data argument, default to configured run_dir.""" +        args = self.args( +            debug=False, dump_all=True, format=None, instance_data=None, +            list_keys=False, user_data=None, vendor_data=None, varname=None) +        run_dir = self.tmp_path('run_dir', dir=self.tmp) +        ensure_dir(run_dir) +        paths = Paths({'run_dir': run_dir}) +        self.add_patch('cloudinit.cmd.query.read_cfg_paths', 'm_paths') +        self.m_paths.return_value = paths +        with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: +            self.assertEqual(1, query.handle_args('anyname', args)) +        json_file = os.path.join(run_dir, INSTANCE_JSON_FILE) +        self.assertIn( +            'ERROR: Missing instance-data.json file: %s' % json_file, +            self.logs.getvalue()) +        self.assertIn( +            'ERROR: Missing instance-data.json file: %s' % json_file, +            m_stderr.getvalue()) + +    def test_handle_args_dumps_all_instance_data(self): +        """When --all is specified query will dump all instance data vars.""" +        write_file(self.instance_data, '{"my-var": "it worked"}') +        args = self.args( +            debug=False, dump_all=True, format=None, +            instance_data=self.instance_data, list_keys=False, +            user_data='ud', vendor_data='vd', varname=None) +        with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: +            self.assertEqual(0, query.handle_args('anyname', args)) +        self.assertEqual( +            '{\n "my_var": "it worked",\n "userdata": "<%s> file:ud",\n' +            ' "vendordata": "<%s> file:vd"\n}\n' % ( +                REDACT_SENSITIVE_VALUE, REDACT_SENSITIVE_VALUE), +            m_stdout.getvalue()) + +    def test_handle_args_returns_top_level_varname(self): +        """When the argument varname is passed, report its value.""" +        write_file(self.instance_data, '{"my-var": "it worked"}') +        args = self.args( +            debug=False, dump_all=True, format=None, +            instance_data=self.instance_data, list_keys=False, +            user_data='ud', vendor_data='vd', varname='my_var') +        with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: +            self.assertEqual(0, query.handle_args('anyname', args)) +        self.assertEqual('it worked\n', m_stdout.getvalue()) + +    def test_handle_args_returns_nested_varname(self): +        """If user_data file is a jinja template render instance-data vars.""" +        write_file(self.instance_data, +                   '{"v1": {"key-2": "value-2"}, "my-var": "it worked"}') +        args = self.args( +            debug=False, dump_all=False, format=None, +            instance_data=self.instance_data, user_data='ud', vendor_data='vd', +            list_keys=False, varname='v1.key_2') +        with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: +            self.assertEqual(0, query.handle_args('anyname', args)) +        self.assertEqual('value-2\n', m_stdout.getvalue()) + +    def test_handle_args_returns_standardized_vars_to_top_level_aliases(self): +        """Any standardized vars under v# are promoted as top-level aliases.""" +        write_file( +            self.instance_data, +            '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},' +            ' "top": "gun"}') +        expected = dedent("""\ +            { +             "top": "gun", +             "userdata": "<redacted for non-root user> file:ud", +             "v1": { +              "v1_1": "val1.1" +             }, +             "v1_1": "val1.1", +             "v2": { +              "v2_2": "val2.2" +             }, +             "v2_2": "val2.2", +             "vendordata": "<redacted for non-root user> file:vd" +            } +        """) +        args = self.args( +            debug=False, dump_all=True, format=None, +            instance_data=self.instance_data, user_data='ud', vendor_data='vd', +            list_keys=False, varname=None) +        with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: +            self.assertEqual(0, query.handle_args('anyname', args)) +        self.assertEqual(expected, m_stdout.getvalue()) + +    def test_handle_args_list_keys_sorts_top_level_keys_when_no_varname(self): +        """Sort all top-level keys when only --list-keys provided.""" +        write_file( +            self.instance_data, +            '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},' +            ' "top": "gun"}') +        expected = 'top\nuserdata\nv1\nv1_1\nv2\nv2_2\nvendordata\n' +        args = self.args( +            debug=False, dump_all=False, format=None, +            instance_data=self.instance_data, list_keys=True, user_data='ud', +            vendor_data='vd', varname=None) +        with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: +            self.assertEqual(0, query.handle_args('anyname', args)) +        self.assertEqual(expected, m_stdout.getvalue()) + +    def test_handle_args_list_keys_sorts_nested_keys_when_varname(self): +        """Sort all nested keys of varname object when --list-keys provided.""" +        write_file( +            self.instance_data, +            '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2":' + +            ' {"v2_2": "val2.2"}, "top": "gun"}') +        expected = 'v1_1\nv1_2\n' +        args = self.args( +            debug=False, dump_all=False, format=None, +            instance_data=self.instance_data, list_keys=True, +            user_data='ud', vendor_data='vd', varname='v1') +        with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: +            self.assertEqual(0, query.handle_args('anyname', args)) +        self.assertEqual(expected, m_stdout.getvalue()) + +    def test_handle_args_list_keys_errors_when_varname_is_not_a_dict(self): +        """Raise an error when --list-keys and varname specify a non-list.""" +        write_file( +            self.instance_data, +            '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2": ' + +            '{"v2_2": "val2.2"}, "top": "gun"}') +        expected_error = "ERROR: --list-keys provided but 'top' is not a dict" +        args = self.args( +            debug=False, dump_all=False, format=None, +            instance_data=self.instance_data, list_keys=True, user_data='ud', +            vendor_data='vd',  varname='top') +        with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: +            with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: +                self.assertEqual(1, query.handle_args('anyname', args)) +        self.assertEqual('', m_stdout.getvalue()) +        self.assertIn(expected_error, m_stderr.getvalue()) + +# vi: ts=4 expandtab diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py index 3cc1fb19..dcd2645e 100644 --- a/cloudinit/helpers.py +++ b/cloudinit/helpers.py @@ -239,6 +239,10 @@ class ConfigMerger(object):              if cc_fn and os.path.isfile(cc_fn):                  try:                      i_cfgs.append(util.read_conf(cc_fn)) +                except PermissionError: +                    LOG.debug( +                        'Skipped loading cloud-config from %s due to' +                        ' non-root.', cc_fn)                  except Exception:                      util.logexc(LOG, 'Failed loading of cloud-config from %s',                                  cc_fn) diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index a775f1a8..730e8174 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -38,8 +38,12 @@ DEP_FILESYSTEM = "FILESYSTEM"  DEP_NETWORK = "NETWORK"  DS_PREFIX = 'DataSource' -# File in which instance meta-data, user-data and vendor-data is written +# File in which public available instance meta-data is written +# security-sensitive key values are redacted from this world-readable file  INSTANCE_JSON_FILE = 'instance-data.json' +# security-sensitive key values are present in this root-readable file +INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json' +REDACT_SENSITIVE_VALUE = 'redacted for non-root user'  # Key which can be provide a cloud's official product name to cloud-init  METADATA_CLOUD_NAME_KEY = 'cloud-name' @@ -58,7 +62,7 @@ class InvalidMetaDataException(Exception):      pass -def process_instance_metadata(metadata, key_path=''): +def process_instance_metadata(metadata, key_path='', sensitive_keys=()):      """Process all instance metadata cleaning it up for persisting as json.      Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list @@ -67,22 +71,46 @@ def process_instance_metadata(metadata, key_path=''):      """      md_copy = copy.deepcopy(metadata)      md_copy['base64_encoded_keys'] = [] +    md_copy['sensitive_keys'] = []      for key, val in metadata.items():          if key_path:              sub_key_path = key_path + '/' + key          else:              sub_key_path = key +        if key in sensitive_keys or sub_key_path in sensitive_keys: +            md_copy['sensitive_keys'].append(sub_key_path)          if isinstance(val, str) and val.startswith('ci-b64:'):              md_copy['base64_encoded_keys'].append(sub_key_path)              md_copy[key] = val.replace('ci-b64:', '')          if isinstance(val, dict): -            return_val = process_instance_metadata(val, sub_key_path) +            return_val = process_instance_metadata( +                val, sub_key_path, sensitive_keys)              md_copy['base64_encoded_keys'].extend(                  return_val.pop('base64_encoded_keys')) +            md_copy['sensitive_keys'].extend( +                return_val.pop('sensitive_keys'))              md_copy[key] = return_val      return md_copy +def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE): +    """Redact any sensitive keys from to provided metadata dictionary. + +    Replace any keys values listed in 'sensitive_keys' with redact_value. +    """ +    if not metadata.get('sensitive_keys', []): +        return metadata +    md_copy = copy.deepcopy(metadata) +    for key_path in metadata.get('sensitive_keys'): +        path_parts = key_path.split('/') +        obj = md_copy +        for path in path_parts: +            if isinstance(obj[path], dict) and path != path_parts[-1]: +                obj = obj[path] +        obj[path] = redact_value +    return md_copy + +  URLParams = namedtuple(      'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries']) @@ -127,6 +155,10 @@ class DataSource(object):      _dirty_cache = False +    # N-tuple of keypaths or keynames redact from instance-data.json for +    # non-root users +    sensitive_metadata_keys = ('security-credentials',) +      def __init__(self, sys_cfg, distro, paths, ud_proc=None):          self.sys_cfg = sys_cfg          self.distro = distro @@ -152,12 +184,24 @@ class DataSource(object):      def _get_standardized_metadata(self):          """Return a dictionary of standardized metadata keys.""" -        return {'v1': { -            'local-hostname': self.get_hostname(), -            'instance-id': self.get_instance_id(), -            'cloud-name': self.cloud_name, -            'region': self.region, -            'availability-zone': self.availability_zone}} +        local_hostname = self.get_hostname() +        instance_id = self.get_instance_id() +        availability_zone = self.availability_zone +        cloud_name = self.cloud_name +        # When adding new standard keys prefer underscore-delimited instead +        # of hyphen-delimted to support simple variable references in jinja +        # templates. +        return { +            'v1': { +                'availability-zone': availability_zone, +                'availability_zone': availability_zone, +                'cloud-name': cloud_name, +                'cloud_name': cloud_name, +                'instance-id': instance_id, +                'instance_id': instance_id, +                'local-hostname': local_hostname, +                'local_hostname': local_hostname, +                'region': self.region}}      def clear_cached_attrs(self, attr_defaults=()):          """Reset any cached metadata attributes to datasource defaults. @@ -200,9 +244,7 @@ class DataSource(object):          """          instance_data = {              'ds': { -                'meta_data': self.metadata, -                'user_data': self.get_userdata_raw(), -                'vendor_data': self.get_vendordata_raw()}} +                'meta_data': self.metadata}}          if hasattr(self, 'network_json'):              network_json = getattr(self, 'network_json')              if network_json != UNSET: @@ -217,7 +259,9 @@ class DataSource(object):              # Process content base64encoding unserializable values              content = util.json_dumps(instance_data)              # Strip base64: prefix and set base64_encoded_keys list. -            processed_data = process_instance_metadata(json.loads(content)) +            processed_data = process_instance_metadata( +                json.loads(content), +                sensitive_keys=self.sensitive_metadata_keys)          except TypeError as e:              LOG.warning('Error persisting instance-data.json: %s', str(e))              return False @@ -225,7 +269,11 @@ class DataSource(object):              LOG.warning('Error persisting instance-data.json: %s', str(e))              return False          json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) -        write_json(json_file, processed_data, mode=0o600) +        write_json(json_file, processed_data)  # World readable +        json_sensitive_file = os.path.join(self.paths.run_dir, +                                           INSTANCE_JSON_SENSITIVE_FILE) +        write_json(json_sensitive_file, +                   redact_sensitive_keys(processed_data), mode=0o600)          return True      def _get_data(self): diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index 8299af23..6b965750 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -1,5 +1,6 @@  # This file is part of cloud-init. See LICENSE file for license information. +import copy  import inspect  import os  import six @@ -9,7 +10,8 @@ from cloudinit.event import EventType  from cloudinit.helpers import Paths  from cloudinit import importer  from cloudinit.sources import ( -    INSTANCE_JSON_FILE, DataSource, UNSET) +    INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE, +    UNSET, DataSource, redact_sensitive_keys)  from cloudinit.tests.helpers import CiTestCase, skipIf, mock  from cloudinit.user_data import UserDataProcessor  from cloudinit import util @@ -20,20 +22,24 @@ class DataSourceTestSubclassNet(DataSource):      dsname = 'MyTestSubclass'      url_max_wait = 55 -    def __init__(self, sys_cfg, distro, paths, custom_userdata=None, -                 get_data_retval=True): +    def __init__(self, sys_cfg, distro, paths, custom_metadata=None, +                 custom_userdata=None, get_data_retval=True):          super(DataSourceTestSubclassNet, self).__init__(              sys_cfg, distro, paths)          self._custom_userdata = custom_userdata +        self._custom_metadata = custom_metadata          self._get_data_retval = get_data_retval      def _get_cloud_name(self):          return 'SubclassCloudName'      def _get_data(self): -        self.metadata = {'availability_zone': 'myaz', -                         'local-hostname': 'test-subclass-hostname', -                         'region': 'myregion'} +        if self._custom_metadata: +            self.metadata = self._custom_metadata +        else: +            self.metadata = {'availability_zone': 'myaz', +                             'local-hostname': 'test-subclass-hostname', +                             'region': 'myregion'}          if self._custom_userdata:              self.userdata_raw = self._custom_userdata          else: @@ -278,7 +284,7 @@ class TestDataSource(CiTestCase):              os.path.exists(json_file), 'Found unexpected file %s' % json_file)      def test_get_data_writes_json_instance_data_on_success(self): -        """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root.""" +        """get_data writes INSTANCE_JSON_FILE to run_dir as world readable."""          tmp = self.tmp_dir()          datasource = DataSourceTestSubclassNet(              self.sys_cfg, self.distro, Paths({'run_dir': tmp})) @@ -287,40 +293,90 @@ class TestDataSource(CiTestCase):          content = util.load_file(json_file)          expected = {              'base64_encoded_keys': [], +            'sensitive_keys': [],              'v1': {                  'availability-zone': 'myaz', +                'availability_zone': 'myaz',                  'cloud-name': 'subclasscloudname', +                'cloud_name': 'subclasscloudname',                  'instance-id': 'iid-datasource', +                'instance_id': 'iid-datasource',                  'local-hostname': 'test-subclass-hostname', +                'local_hostname': 'test-subclass-hostname',                  'region': 'myregion'},              'ds': {                  'meta_data': {'availability_zone': 'myaz',                                'local-hostname': 'test-subclass-hostname', -                              'region': 'myregion'}, -                'user_data': 'userdata_raw', -                'vendor_data': 'vendordata_raw'}} -        self.maxDiff = None +                              'region': 'myregion'}}}          self.assertEqual(expected, util.load_json(content))          file_stat = os.stat(json_file) +        self.assertEqual(0o644, stat.S_IMODE(file_stat.st_mode)) +        self.assertEqual(expected, util.load_json(content)) + +    def test_get_data_writes_json_instance_data_sensitive(self): +        """get_data writes INSTANCE_JSON_SENSITIVE_FILE as readonly root.""" +        tmp = self.tmp_dir() +        datasource = DataSourceTestSubclassNet( +            self.sys_cfg, self.distro, Paths({'run_dir': tmp}), +            custom_metadata={ +                'availability_zone': 'myaz', +                'local-hostname': 'test-subclass-hostname', +                'region': 'myregion', +                'some': {'security-credentials': { +                    'cred1': 'sekret', 'cred2': 'othersekret'}}}) +        self.assertEqual( +            ('security-credentials',), datasource.sensitive_metadata_keys) +        datasource.get_data() +        json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) +        sensitive_json_file = self.tmp_path(INSTANCE_JSON_SENSITIVE_FILE, tmp) +        redacted = util.load_json(util.load_file(json_file)) +        self.assertEqual( +            {'cred1': 'sekret', 'cred2': 'othersekret'}, +            redacted['ds']['meta_data']['some']['security-credentials']) +        content = util.load_file(sensitive_json_file) +        expected = { +            'base64_encoded_keys': [], +            'sensitive_keys': ['ds/meta_data/some/security-credentials'], +            'v1': { +                'availability-zone': 'myaz', +                'availability_zone': 'myaz', +                'cloud-name': 'subclasscloudname', +                'cloud_name': 'subclasscloudname', +                'instance-id': 'iid-datasource', +                'instance_id': 'iid-datasource', +                'local-hostname': 'test-subclass-hostname', +                'local_hostname': 'test-subclass-hostname', +                'region': 'myregion'}, +            'ds': { +                'meta_data': { +                    'availability_zone': 'myaz', +                    'local-hostname': 'test-subclass-hostname', +                    'region': 'myregion', +                    'some': {'security-credentials': REDACT_SENSITIVE_VALUE}}} +        } +        self.maxDiff = None +        self.assertEqual(expected, util.load_json(content)) +        file_stat = os.stat(sensitive_json_file)          self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode)) +        self.assertEqual(expected, util.load_json(content))      def test_get_data_handles_redacted_unserializable_content(self):          """get_data warns unserializable content in INSTANCE_JSON_FILE."""          tmp = self.tmp_dir()          datasource = DataSourceTestSubclassNet(              self.sys_cfg, self.distro, Paths({'run_dir': tmp}), -            custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) +            custom_metadata={'key1': 'val1', 'key2': {'key2.1': self.paths}})          datasource.get_data()          json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)          content = util.load_file(json_file) -        expected_userdata = { +        expected_metadata = {              'key1': 'val1',              'key2': {                  'key2.1': "Warning: redacted unserializable type <class"                            " 'cloudinit.helpers.Paths'>"}}          instance_json = util.load_json(content)          self.assertEqual( -            expected_userdata, instance_json['ds']['user_data']) +            expected_metadata, instance_json['ds']['meta_data'])      def test_persist_instance_data_writes_ec2_metadata_when_set(self):          """When ec2_metadata class attribute is set, persist to json.""" @@ -361,17 +417,17 @@ class TestDataSource(CiTestCase):          tmp = self.tmp_dir()          datasource = DataSourceTestSubclassNet(              self.sys_cfg, self.distro, Paths({'run_dir': tmp}), -            custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) +            custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})          self.assertTrue(datasource.get_data())          json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)          content = util.load_file(json_file)          instance_json = util.load_json(content) -        self.assertEqual( -            ['ds/user_data/key2/key2.1'], +        self.assertItemsEqual( +            ['ds/meta_data/key2/key2.1'],              instance_json['base64_encoded_keys'])          self.assertEqual(              {'key1': 'val1', 'key2': {'key2.1': 'EjM='}}, -            instance_json['ds']['user_data']) +            instance_json['ds']['meta_data'])      @skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes")      def test_get_data_handles_bytes_values(self): @@ -379,7 +435,7 @@ class TestDataSource(CiTestCase):          tmp = self.tmp_dir()          datasource = DataSourceTestSubclassNet(              self.sys_cfg, self.distro, Paths({'run_dir': tmp}), -            custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) +            custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})          self.assertTrue(datasource.get_data())          json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)          content = util.load_file(json_file) @@ -387,7 +443,7 @@ class TestDataSource(CiTestCase):          self.assertEqual([], instance_json['base64_encoded_keys'])          self.assertEqual(              {'key1': 'val1', 'key2': {'key2.1': '\x123'}}, -            instance_json['ds']['user_data']) +            instance_json['ds']['meta_data'])      @skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8")      def test_non_utf8_encoding_logs_warning(self): @@ -395,7 +451,7 @@ class TestDataSource(CiTestCase):          tmp = self.tmp_dir()          datasource = DataSourceTestSubclassNet(              self.sys_cfg, self.distro, Paths({'run_dir': tmp}), -            custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) +            custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}})          self.assertTrue(datasource.get_data())          json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)          self.assertFalse(os.path.exists(json_file)) @@ -509,4 +565,36 @@ class TestDataSource(CiTestCase):              self.logs.getvalue()) +class TestRedactSensitiveData(CiTestCase): + +    def test_redact_sensitive_data_noop_when_no_sensitive_keys_present(self): +        """When sensitive_keys is absent or empty from metadata do nothing.""" +        md = {'my': 'data'} +        self.assertEqual( +            md, redact_sensitive_keys(md, redact_value='redacted')) +        md['sensitive_keys'] = [] +        self.assertEqual( +            md, redact_sensitive_keys(md, redact_value='redacted')) + +    def test_redact_sensitive_data_redacts_exact_match_name(self): +        """Only exact matched sensitive_keys are redacted from metadata.""" +        md = {'sensitive_keys': ['md/secure'], +              'md': {'secure': 's3kr1t', 'insecure': 'publik'}} +        secure_md = copy.deepcopy(md) +        secure_md['md']['secure'] = 'redacted' +        self.assertEqual( +            secure_md, +            redact_sensitive_keys(md, redact_value='redacted')) + +    def test_redact_sensitive_data_does_redacts_with_default_string(self): +        """When redact_value is absent, REDACT_SENSITIVE_VALUE is used.""" +        md = {'sensitive_keys': ['md/secure'], +              'md': {'secure': 's3kr1t', 'insecure': 'publik'}} +        secure_md = copy.deepcopy(md) +        secure_md['md']['secure'] = 'redacted for non-root user' +        self.assertEqual( +            secure_md, +            redact_sensitive_keys(md)) + +  # vi: ts=4 expandtab diff --git a/doc/rtd/index.rst b/doc/rtd/index.rst index de67f361..20a99a30 100644 --- a/doc/rtd/index.rst +++ b/doc/rtd/index.rst @@ -31,6 +31,7 @@ initialization of a cloud instance.     topics/capabilities.rst     topics/availability.rst     topics/format.rst +   topics/instancedata.rst     topics/dir_layout.rst     topics/examples.rst     topics/boot.rst diff --git a/doc/rtd/topics/capabilities.rst b/doc/rtd/topics/capabilities.rst index 2d8e2538..0d8b8947 100644 --- a/doc/rtd/topics/capabilities.rst +++ b/doc/rtd/topics/capabilities.rst @@ -18,7 +18,7 @@ User configurability      User-data can be given by the user at instance launch time. See      :ref:`user_data_formats` for acceptable user-data content. -     +  This is done via the ``--user-data`` or ``--user-data-file`` argument to  ec2-run-instances for example. @@ -53,10 +53,9 @@ system:    % cloud-init --help    usage: cloud-init [-h] [--version] [--file FILES] -                      [--debug] [--force] -                    {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} -                    ... +                    {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status} +                                                         ...    optional arguments:      -h, --help            show this help message and exit @@ -68,17 +67,19 @@ system:                            your own risk)    Subcommands: -    {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} +    {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status}        init                initializes cloud-init and performs initial modules        modules             activates modules using a given configuration key        single              run a single module +      query               Query instance metadata from the command line        dhclient-hook       run the dhclient hookto record network info        features            list defined features        analyze             Devel tool: Analyze cloud-init logs and data        devel               Run development tools        collect-logs        Collect and tar all cloud-init debug info -      clean               Remove logs and artifacts so cloud-init can re-run. -      status              Report cloud-init status or wait on completion. +      clean               Remove logs and artifacts so cloud-init can re-run +      status              Report cloud-init status or wait on completion +  CLI Subcommand details  ====================== @@ -104,8 +105,8 @@ cloud-init status  Report whether cloud-init is running, done, disabled or errored. Exits  non-zero if an error is detected in cloud-init. - * **--long**: Detailed status information. - * **--wait**: Block until cloud-init completes. +* **--long**: Detailed status information. +* **--wait**: Block until cloud-init completes.  .. code-block:: shell-session @@ -143,6 +144,68 @@ Logs collected are:   * journalctl output   * /var/lib/cloud/instance/user-data.txt +.. _cli_query: + +cloud-init query +------------------ +Query standardized cloud instance metadata crawled by cloud-init and stored +in ``/run/cloud-init/instance-data.json``. This is a convenience command-line +interface to reference any cached configuration metadata that cloud-init +crawls when booting the instance. See :ref:`instance_metadata` for more info. + +* **--all**: Dump all available instance data as json which can be queried. +* **--instance-data**: Optional path to a different instance-data.json file to +  source for queries. +* **--list-keys**: List available query keys from cached instance data. + +.. code-block:: shell-session + +  # List all top-level query keys available (includes standardized aliases) +  % cloud-init query --list-keys +  availability_zone +  base64_encoded_keys +  cloud_name +  ds +  instance_id +  local_hostname +  region +  v1 + +* **<varname>**: A dot-delimited variable path into the instance-data.json +   object. + +.. code-block:: shell-session + +  # Query cloud-init standardized metadata on any cloud +  % cloud-init query v1.cloud_name +  aws  # or openstack, azure, gce etc. + +  # Any standardized instance-data under a <v#> key is aliased as a top-level +  # key for convenience. +  % cloud-init query cloud_name +  aws  # or openstack, azure, gce etc. + +  # Query datasource-specific metadata on EC2 +  % cloud-init query ds.meta_data.public_ipv4 + +* **--format** A string that will use jinja-template syntax to render a string +   replacing + +.. code-block:: shell-session + +  # Generate a custom hostname fqdn based on instance-id, cloud and region +  % cloud-init query --format 'custom-{{instance_id}}.{{region}}.{{v1.cloud_name}}.com' +  custom-i-0e91f69987f37ec74.us-east-2.aws.com + + +.. note:: +  The standardized instance data keys under **v#** are guaranteed not to change +  behavior or format. If using top-level convenience aliases for any +  standardized instance data keys, the most value (highest **v#**) of that key +  name is what is reported as the top-level value. So these aliases act as a +  'latest'. + +  .. _cli_analyze:  cloud-init analyze @@ -150,10 +213,10 @@ cloud-init analyze  Get detailed reports of where cloud-init spends most of its time. See  :ref:`boot_time_analysis` for more info. - * **blame** Report ordered by most costly operations. - * **dump** Machine-readable JSON dump of all cloud-init tracked events. - * **show** show time-ordered report of the cost of operations during each -   boot stage. +* **blame** Report ordered by most costly operations. +* **dump** Machine-readable JSON dump of all cloud-init tracked events. +* **show** show time-ordered report of the cost of operations during each +  boot stage.  .. _cli_devel: @@ -182,8 +245,8 @@ cloud-init clean  Remove cloud-init artifacts from /var/lib/cloud and optionally reboot the  machine to so cloud-init re-runs all stages as it did on first boot. - * **--logs**: Optionally remove /var/log/cloud-init*log files. - * **--reboot**: Reboot the system after removing artifacts. +* **--logs**: Optionally remove /var/log/cloud-init*log files. +* **--reboot**: Reboot the system after removing artifacts.  .. _cli_init: @@ -195,7 +258,7 @@ Can be run on the commandline, but is generally gated to run only once  due to semaphores in **/var/lib/cloud/instance/sem/** and  **/var/lib/cloud/sem**. - * **--local**: Run *init-local* stage instead of *init*. +* **--local**: Run *init-local* stage instead of *init*.  .. _cli_modules: @@ -210,8 +273,8 @@ declared to run in various boot stages in the file  commandline, but each module is gated to run only once due to semaphores  in ``/var/lib/cloud/``. - * **--mode (init|config|final)**: Run *modules:init*, *modules:config* or -   *modules:final* cloud-init stages. See :ref:`boot_stages` for more info. +* **--mode (init|config|final)**: Run *modules:init*, *modules:config* or +  *modules:final* cloud-init stages. See :ref:`boot_stages` for more info.  .. _cli_single: @@ -221,9 +284,9 @@ Attempt to run a single named cloud config module.  The following example  re-runs the cc_set_hostname module ignoring the module default frequency  of once-per-instance: - * **--name**: The cloud-config module name to run - * **--frequency**: Optionally override the declared module frequency -   with one of (always|once-per-instance|once) +* **--name**: The cloud-config module name to run +* **--frequency**: Optionally override the declared module frequency +  with one of (always|once-per-instance|once)  .. code-block:: shell-session diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index 14432e65..e34f145c 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -17,146 +17,10 @@ own way) internally a datasource abstract class was created to allow for a  single way to access the different cloud systems methods to provide this data  through the typical usage of subclasses. - -.. _instance_metadata: - -instance-data -------------- -For reference, cloud-init stores all the metadata, vendordata and userdata -provided by a cloud in a json blob at ``/run/cloud-init/instance-data.json``. -While the json contains datasource-specific keys and names, cloud-init will -maintain a minimal set of standardized keys that will remain stable on any -cloud. Standardized instance-data keys will be present under a "v1" key. -Any datasource metadata cloud-init consumes will all be present under the -"ds" key. - -Below is an instance-data.json example from an OpenStack instance: - -.. sourcecode:: json - -  { -   "base64-encoded-keys": [ -    "ds/meta-data/random_seed", -    "ds/user-data" -   ], -   "ds": { -    "ec2_metadata": { -     "ami-id": "ami-0000032f", -     "ami-launch-index": "0", -     "ami-manifest-path": "FIXME", -     "block-device-mapping": { -      "ami": "vda", -      "ephemeral0": "/dev/vdb", -      "root": "/dev/vda" -     }, -     "hostname": "xenial-test.novalocal", -     "instance-action": "none", -     "instance-id": "i-0006e030", -     "instance-type": "m1.small", -     "local-hostname": "xenial-test.novalocal", -     "local-ipv4": "10.5.0.6", -     "placement": { -      "availability-zone": "None" -     }, -     "public-hostname": "xenial-test.novalocal", -     "public-ipv4": "10.245.162.145", -     "reservation-id": "r-fxm623oa", -     "security-groups": "default" -    }, -    "meta-data": { -     "availability_zone": null, -     "devices": [], -     "hostname": "xenial-test.novalocal", -     "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0", -     "launch_index": 0, -     "local-hostname": "xenial-test.novalocal", -     "name": "xenial-test", -     "project_id": "e0eb2d2538814...", -     "random_seed": "A6yPN...", -     "uuid": "3e39d278-0644-4728-9479-678f92..." -    }, -    "network_json": { -     "links": [ -      { -       "ethernet_mac_address": "fa:16:3e:7d:74:9b", -       "id": "tap9ca524d5-6e", -       "mtu": 8958, -       "type": "ovs", -       "vif_id": "9ca524d5-6e5a-4809-936a-6901..." -      } -     ], -     "networks": [ -      { -       "id": "network0", -       "link": "tap9ca524d5-6e", -       "network_id": "c6adfc18-9753-42eb-b3ea-18b57e6b837f", -       "type": "ipv4_dhcp" -      } -     ], -     "services": [ -      { -       "address": "10.10.160.2", -       "type": "dns" -      } -     ] -    }, -    "user-data": "I2Nsb3VkLWNvbmZpZ...", -    "vendor-data": null -   }, -   "v1": { -    "availability-zone": null, -    "cloud-name": "openstack", -    "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0", -    "local-hostname": "xenial-test", -    "region": null -   } -  } - -  -As of cloud-init v. 18.4, any values present in -``/run/cloud-init/instance-data.json`` can be used in cloud-init user data -scripts or cloud config data. This allows consumers to use cloud-init's -vendor-neutral, standardized metadata keys as well as datasource-specific -content for any scripts or cloud-config modules they are using. - -To use instance-data.json values in scripts and **#config-config** files the -user-data will need to contain the following header as the first line **## template: jinja**. Cloud-init will source all variables defined in -``/run/cloud-init/instance-data.json`` and allow scripts or cloud-config files  -to reference those paths. Below are two examples:: - - * Cloud config calling home with the ec2 public hostname and avaliability-zone -    ``` -    ## template: jinja -    #cloud-config -    runcmd: -        - echo 'EC2 public hostname allocated to instance: {{ ds.meta_data.public_hostname }}' > /tmp/instance_metadata -        - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> /tmp/instance_metadata  -        - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", "availability-zone": "{{ v1.availability_zone }}"}'  https://example.com.com -    ``` - - * Custom user script performing different operations based on region -    ``` -    ## template: jinja -    #!/bin/bash -    {% if v1.region == 'us-east-2' -%} -    echo 'Installing custom proxies for {{ v1.region }} -    sudo apt-get install my-xtra-fast-stack -    {%- endif %} -    ... - -    ``` - -.. note:: -  Trying to reference jinja variables that don't exist in -  instance-data.json will result in warnings in ``/var/log/cloud-init.log`` -  and the following string in your rendered user-data: -  ``CI_MISSING_JINJA_VAR/<your_varname>``. -   -.. note:: -  To save time designing your user-data for a specific cloud's -  instance-data.json, use the 'render' cloud-init command on an -  instance booted on your favorite cloud. See :ref:`cli_devel` for more -  information. +Any metadata processed by cloud-init's datasources is persisted as +``/run/cloud0-init/instance-data.json``. Cloud-init provides tooling +to quickly introspect some of that data. See :ref:`instance_metadata` for +more information.  Datasource API @@ -196,14 +60,14 @@ The current interface that a datasource object must provide is the following:      # or does not exist)      def device_name_to_device(self, name) -    # gets the locale string this instance should be applying  +    # gets the locale string this instance should be applying      # which typically used to adjust the instances locale settings files      def get_locale(self)      @property      def availability_zone(self) -    # gets the instance id that was assigned to this instance by the  +    # gets the instance id that was assigned to this instance by the      # cloud provider or when said instance id does not exist in the backing      # metadata this will return 'iid-datasource'      def get_instance_id(self) diff --git a/doc/rtd/topics/instancedata.rst b/doc/rtd/topics/instancedata.rst new file mode 100644 index 00000000..634e1807 --- /dev/null +++ b/doc/rtd/topics/instancedata.rst @@ -0,0 +1,297 @@ +.. _instance_metadata: + +***************** +Instance Metadata +***************** + +What is a instance data? +======================== + +Instance data is the collection of all configuration data that cloud-init +processes to configure the instance. This configuration typically +comes from any number of sources: + +* cloud-provided metadata services (aka metadata) +* custom config-drive attached to the instance +* cloud-config seed files in the booted cloud image or distribution +* vendordata provided from files or cloud metadata services +* userdata provided at instance creation + +Each cloud provider presents unique configuration metadata in different +formats to the instance. Cloud-init provides a cache of any crawled metadata +as well as a versioned set of standardized instance data keys which it makes +available on all platforms. + +Cloud-init produces a simple json object in +``/run/cloud-init/instance-data.json`` which represents standardized and +versioned representation of the metadata it consumes during initial boot. The +intent is to provide the following benefits to users or scripts on any system +deployed with cloud-init: + +* simple static object to query to obtain a instance's metadata +* speed: avoid costly network transactions for metadata that is already cached +  on the filesytem +* reduce need to recrawl metadata services for static metadata that is already +  cached +* leverage cloud-init's best practices for crawling cloud-metadata services +* avoid rolling unique metadata crawlers on each cloud platform to get +  metadata configuration values + +Cloud-init stores any instance data processed in the following files: + +* ``/run/cloud-init/instance-data.json``: world-readable json containing +  standardized keys, sensitive keys redacted +* ``/run/cloud-init/instance-data-sensitive.json``: root-readable unredacted +  json blob +* ``/var/lib/cloud/instance/user-data.txt``: root-readable sensitive raw +  userdata +* ``/var/lib/cloud/instance/vendor-data.txt``: root-readable sensitive raw +  vendordata + +Cloud-init redacts any security sensitive content from instance-data.json, +stores ``/run/cloud-init/instance-data.json`` as a world-readable json file. +Because user-data and vendor-data can contain passwords both of these files +are readonly for *root* as well. The *root* user can also read +``/run/cloud-init/instance-data-sensitive.json`` which is all instance data +from instance-data.json as well as unredacted sensitive content. + + +Format of instance-data.json +============================ + +The instance-data.json and instance-data-sensitive.json files are well-formed +JSON and record the set of keys and values for any metadata processed by +cloud-init. Cloud-init standardizes the format for this content so that it +can be generalized across different cloud platforms. + +There are three basic top-level keys: + +* **base64_encoded_keys**: A list of forward-slash delimited key paths into +  the instance-data.json object whose value is base64encoded for json +  compatibility. Values at these paths should be decoded to get the original +  value. + +* **sensitive_keys**: A list of forward-slash delimited key paths into +  the instance-data.json object whose value is considered by the datasource as +  'security sensitive'. Only the keys listed here will be redacted from +  instance-data.json for non-root users. + +* **ds**: Datasource-specific metadata crawled for the specific cloud +  platform. It should closely represent the structure of the cloud metadata +  crawled. The structure of content and details provided are entirely +  cloud-dependent. Mileage will vary depending on what the cloud exposes. +  The content exposed under the 'ds' key is currently **experimental** and +  expected to change slightly in the upcoming cloud-init release. + +* **v1**: Standardized cloud-init metadata keys, these keys are guaranteed to +  exist on all cloud platforms. They will also retain their current behavior +  and format and will be carried forward even if cloud-init introduces a new +  version of standardized keys with **v2**. + +The standardized keys present: + ++----------------------+-----------------------------------------------+---------------------------+ +|  Key path            | Description                                   | Examples                  | ++======================+===============================================+===========================+ +| v1.cloud_name        | The name of the cloud provided by metadata    | aws, openstack, azure,    | +|                      | key 'cloud-name' or the cloud-init datasource | configdrive, nocloud,     | +|                      | name which was discovered.                    | ovf, etc.                 | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.instance_id       | Unique instance_id allocated by the cloud     | i-<somehash>              | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.local_hostname    | The internal or local hostname of the system  | ip-10-41-41-70,           | +|                      |                                               | <user-provided-hostname>  | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.region            | The physical region/datacenter in which the   | us-east-2                 | +|                      | instance is deployed                          |                           | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.availability_zone | The physical availability zone in which the   | us-east-2b, nova, null    | +|                      | instance is deployed                          |                           | ++----------------------+-----------------------------------------------+---------------------------+ + + +Below is an example of ``/run/cloud-init/instance_data.json`` on an EC2 +instance: + +.. sourcecode:: json + +  { +   "base64_encoded_keys": [], +   "sensitive_keys": [], +   "ds": { +    "meta_data": { +     "ami-id": "ami-014e1416b628b0cbf", +     "ami-launch-index": "0", +     "ami-manifest-path": "(unknown)", +     "block-device-mapping": { +      "ami": "/dev/sda1", +      "ephemeral0": "sdb", +      "ephemeral1": "sdc", +      "root": "/dev/sda1" +     }, +     "hostname": "ip-10-41-41-70.us-east-2.compute.internal", +     "instance-action": "none", +     "instance-id": "i-04fa31cfc55aa7976", +     "instance-type": "t2.micro", +     "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", +     "local-ipv4": "10.41.41.70", +     "mac": "06:b6:92:dd:9d:24", +     "metrics": { +      "vhostmd": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +     }, +     "network": { +      "interfaces": { +       "macs": { +	"06:b6:92:dd:9d:24": { +	 "device-number": "0", +	 "interface-id": "eni-08c0c9fdb99b6e6f4", +	 "ipv4-associations": { +	  "18.224.22.43": "10.41.41.70" +	 }, +	 "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", +	 "local-ipv4s": "10.41.41.70", +	 "mac": "06:b6:92:dd:9d:24", +	 "owner-id": "437526006925", +	 "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", +	 "public-ipv4s": "18.224.22.43", +	 "security-group-ids": "sg-828247e9", +	 "security-groups": "Cloud-init integration test secgroup", +	 "subnet-id": "subnet-282f3053", +	 "subnet-ipv4-cidr-block": "10.41.41.0/24", +	 "subnet-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/64", +	 "vpc-id": "vpc-252ef24d", +	 "vpc-ipv4-cidr-block": "10.41.0.0/16", +	 "vpc-ipv4-cidr-blocks": "10.41.0.0/16", +	 "vpc-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/56" +	} +       } +      } +     }, +     "placement": { +      "availability-zone": "us-east-2b" +     }, +     "profile": "default-hvm", +     "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", +     "public-ipv4": "18.224.22.43", +     "public-keys": { +      "cloud-init-integration": [ +       "ssh-rsa +  AAAAB3NzaC1yc2EAAAADAQABAAABAQDSL7uWGj8cgWyIOaspgKdVy0cKJ+UTjfv7jBOjG2H/GN8bJVXy72XAvnhM0dUM+CCs8FOf0YlPX+Frvz2hKInrmRhZVwRSL129PasD12MlI3l44u6IwS1o/W86Q+tkQYEljtqDOo0a+cOsaZkvUNzUyEXUwz/lmYa6G4hMKZH4NBj7nbAAF96wsMCoyNwbWryBnDYUr6wMbjRR1J9Pw7Xh7WRC73wy4Va2YuOgbD3V/5ZrFPLbWZW/7TFXVrql04QVbyei4aiFR5n//GvoqwQDNe58LmbzX/xvxyKJYdny2zXmdAhMxbrpFQsfpkJ9E/H5w0yOdSvnWbUoG5xNGoOB +  cloud-init-integration" +      ] +     }, +     "reservation-id": "r-06ab75e9346f54333", +     "security-groups": "Cloud-init integration test secgroup", +     "services": { +      "domain": "amazonaws.com", +      "partition": "aws" +     } +    } +   }, +   "v1": { +    "availability-zone": "us-east-2b", +    "availability_zone": "us-east-2b", +    "cloud-name": "aws", +    "cloud_name": "aws", +    "instance-id": "i-04fa31cfc55aa7976", +    "instance_id": "i-04fa31cfc55aa7976", +    "local-hostname": "ip-10-41-41-70", +    "local_hostname": "ip-10-41-41-70", +    "region": "us-east-2" +   } +  } + + +Using instance-data +=================== + +As of cloud-init v. 18.4, any variables present in +``/run/cloud-init/instance-data.json`` can be used in: + +* User-data scripts +* Cloud config data +* Command line interface via **cloud-init query** or +  **cloud-init devel render** + +Many clouds allow users to provide user-data to an instance at +the time the instance is launched. Cloud-init supports a number of +:ref:`user_data_formats`. + +Both user-data scripts and **#cloud-config** data support jinja template +rendering. +When the first line of the provided user-data begins with, +**## template: jinja** cloud-init will use jinja to render that file. +Any instance-data-sensitive.json variables are surfaced as dot-delimited +jinja template variables because cloud-config modules are run as 'root' +user. + + +Below are some examples of providing these types of user-data: + +* Cloud config calling home with the ec2 public hostname and avaliability-zone + +.. code-block:: shell-session + +  ## template: jinja +  #cloud-config +  runcmd: +      - echo 'EC2 public hostname allocated to instance: {{ +        ds.meta_data.public_hostname }}' > /tmp/instance_metadata +      - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> +        /tmp/instance_metadata +      - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", +        "availability-zone": "{{ v1.availability_zone }}"}' +        https://example.com + +* Custom user-data script performing different operations based on region + +.. code-block:: shell-session + +   ## template: jinja +   #!/bin/bash +   {% if v1.region == 'us-east-2' -%} +   echo 'Installing custom proxies for {{ v1.region }} +   sudo apt-get install my-xtra-fast-stack +   {%- endif %} +   ... + +.. note:: +  Trying to reference jinja variables that don't exist in +  instance-data.json will result in warnings in ``/var/log/cloud-init.log`` +  and the following string in your rendered user-data: +  ``CI_MISSING_JINJA_VAR/<your_varname>``. + +Cloud-init also surfaces a commandline tool **cloud-init query** which can +assist developers or scripts with obtaining instance metadata easily. See +:ref:`cli_query` for more information. + +To cut down on keystrokes on the command line, cloud-init also provides +top-level key aliases for any standardized ``v#`` keys present. The preceding +``v1`` is not required of ``v1.var_name`` These aliases will represent the +value of the highest versioned standard key. For example, ``cloud_name`` +value will be ``v2.cloud_name`` if both ``v1`` and ``v2`` keys are present in +instance-data.json. +The **query** command also publishes ``userdata`` and ``vendordata`` keys to +the root user which will contain the decoded user and vendor data provided to +this instance. Non-root users referencing userdata or vendordata keys will +see only redacted values. + +.. code-block:: shell-session + + # List all top-level instance-data keys available + % cloud-init query --list-keys + + # Find your EC2 ami-id + % cloud-init query ds.metadata.ami_id + + # Format your cloud_name and region using jinja template syntax + % cloud-init query --format 'cloud: {{ v1.cloud_name }} myregion: {{ + % v1.region }}' + +.. note:: +  To save time designing a user-data template for a specific cloud's +  instance-data.json, use the 'render' cloud-init command on an +  instance booted on your favorite cloud. See :ref:`cli_devel` for more +  information. + +.. vi: textwidth=78 diff --git a/integration-requirements.txt b/integration-requirements.txt index f80cb942..880d9886 100644 --- a/integration-requirements.txt +++ b/integration-requirements.txt @@ -5,16 +5,17 @@  # the packages/pkg-deps.json file as well.  # +unittest2  # ec2 backend  boto3==1.5.9  # ssh communication  paramiko==2.4.1 +  # lxd backend  # 04/03/2018: enables use of lxd 3.0  git+https://github.com/lxc/pylxd.git@4b8ab1802f9aee4eb29cf7b119dae0aa47150779 -  # finds latest image information  git+https://git.launchpad.net/simplestreams diff --git a/tests/cloud_tests/testcases/base.py b/tests/cloud_tests/testcases/base.py index 27458271..c5457968 100644 --- a/tests/cloud_tests/testcases/base.py +++ b/tests/cloud_tests/testcases/base.py @@ -5,15 +5,15 @@  import crypt  import json  import re -import unittest +import unittest2  from cloudinit import util as c_util -SkipTest = unittest.SkipTest +SkipTest = unittest2.SkipTest -class CloudTestCase(unittest.TestCase): +class CloudTestCase(unittest2.TestCase):      """Base test class for verifiers."""      # data gets populated in get_suite.setUpClass @@ -167,8 +167,9 @@ class CloudTestCase(unittest.TestCase):                  'Skipping instance-data.json test.'                  ' OS: %s not bionic or newer' % self.os_name)          instance_data = json.loads(out) -        self.assertEqual( -            ['ds/user_data'], instance_data['base64_encoded_keys']) +        self.assertItemsEqual( +            [], +            instance_data['base64_encoded_keys'])          ds = instance_data.get('ds', {})          v1_data = instance_data.get('v1', {})          metadata = ds.get('meta-data', {}) @@ -187,10 +188,10 @@ class CloudTestCase(unittest.TestCase):              metadata.get('placement', {}).get('availability-zone'),              'Could not determine EC2 Availability zone placement')          self.assertIsNotNone( -            v1_data['availability-zone'], 'expected ec2 availability-zone') -        self.assertEqual('aws', v1_data['cloud-name']) -        self.assertIn('i-', v1_data['instance-id']) -        self.assertIn('ip-', v1_data['local-hostname']) +            v1_data['availability_zone'], 'expected ec2 availability_zone') +        self.assertEqual('aws', v1_data['cloud_name']) +        self.assertIn('i-', v1_data['instance_id']) +        self.assertIn('ip-', v1_data['local_hostname'])          self.assertIsNotNone(v1_data['region'], 'expected ec2 region')      def test_instance_data_json_lxd(self): @@ -213,16 +214,14 @@ class CloudTestCase(unittest.TestCase):                  ' OS: %s not bionic or newer' % self.os_name)          instance_data = json.loads(out)          v1_data = instance_data.get('v1', {}) -        self.assertEqual( -            ['ds/user_data', 'ds/vendor_data'], -            sorted(instance_data['base64_encoded_keys'])) -        self.assertEqual('nocloud', v1_data['cloud-name']) +        self.assertItemsEqual([], sorted(instance_data['base64_encoded_keys'])) +        self.assertEqual('nocloud', v1_data['cloud_name'])          self.assertIsNone( -            v1_data['availability-zone'], -            'found unexpected lxd availability-zone %s' % -            v1_data['availability-zone']) -        self.assertIn('cloud-test', v1_data['instance-id']) -        self.assertIn('cloud-test', v1_data['local-hostname']) +            v1_data['availability_zone'], +            'found unexpected lxd availability_zone %s' % +            v1_data['availability_zone']) +        self.assertIn('cloud-test', v1_data['instance_id']) +        self.assertIn('cloud-test', v1_data['local_hostname'])          self.assertIsNone(              v1_data['region'],              'found unexpected lxd region %s' % v1_data['region']) @@ -248,18 +247,17 @@ class CloudTestCase(unittest.TestCase):                  ' OS: %s not bionic or newer' % self.os_name)          instance_data = json.loads(out)          v1_data = instance_data.get('v1', {}) -        self.assertEqual( -            ['ds/user_data'], instance_data['base64_encoded_keys']) -        self.assertEqual('nocloud', v1_data['cloud-name']) +        self.assertItemsEqual([], instance_data['base64_encoded_keys']) +        self.assertEqual('nocloud', v1_data['cloud_name'])          self.assertIsNone( -            v1_data['availability-zone'], -            'found unexpected kvm availability-zone %s' % -            v1_data['availability-zone']) +            v1_data['availability_zone'], +            'found unexpected kvm availability_zone %s' % +            v1_data['availability_zone'])          self.assertIsNotNone(              re.match(r'[\da-f]{8}(-[\da-f]{4}){3}-[\da-f]{12}', -                     v1_data['instance-id']), -            'kvm instance-id is not a UUID: %s' % v1_data['instance-id']) -        self.assertIn('ubuntu', v1_data['local-hostname']) +                     v1_data['instance_id']), +            'kvm instance_id is not a UUID: %s' % v1_data['instance_id']) +        self.assertIn('ubuntu', v1_data['local_hostname'])          self.assertIsNone(              v1_data['region'],              'found unexpected lxd region %s' % v1_data['region']) | 
