summaryrefslogtreecommitdiff
path: root/cloudinit
diff options
context:
space:
mode:
authorChad Smith <chad.smith@canonical.com>2018-09-25 21:59:16 +0000
committerServer Team CI Bot <josh.powers+server-team-bot@canonical.com>2018-09-25 21:59:16 +0000
commitfc4b966ba928b30b1c586407e752e0b51b1031e8 (patch)
tree4a23ee46076d56c14396f40c2a1abb828e630aa5 /cloudinit
parent0b0378dd07f16d45c16e5750b6815b22a771860d (diff)
downloadvyos-cloud-init-fc4b966ba928b30b1c586407e752e0b51b1031e8.tar.gz
vyos-cloud-init-fc4b966ba928b30b1c586407e752e0b51b1031e8.zip
cli: add cloud-init query subcommand to query instance metadata
Cloud-init caches any cloud metadata crawled during boot in the file /run/cloud-init/instance-data.json. Cloud-init also standardizes some of that metadata across all clouds. The command 'cloud-init query' surfaces a simple CLI to query or format any cached instance metadata so that scripts or end-users do not have to write tools to crawl metadata themselves. Since 'cloud-init query' is runnable by non-root users, redact any sensitive data from instance-data.json and provide a root-readable unredacted instance-data-sensitive.json. Datasources can now define a sensitive_metadata_keys tuple which will redact any matching keys which could contain passwords or credentials from instance-data.json. Also add the following standardized 'v1' instance-data.json keys:   - user_data: The base64encoded user-data provided at instance launch   - vendor_data: Any vendor_data provided to the instance at launch   - underscore_delimited versions of existing hyphenated keys:     instance_id, local_hostname, availability_zone, cloud_name
Diffstat (limited to 'cloudinit')
-rwxr-xr-xcloudinit/cmd/devel/render.py7
-rw-r--r--cloudinit/cmd/main.py10
-rw-r--r--cloudinit/cmd/query.py155
-rw-r--r--cloudinit/cmd/tests/test_query.py193
-rw-r--r--cloudinit/helpers.py4
-rw-r--r--cloudinit/sources/__init__.py76
-rw-r--r--cloudinit/sources/tests/test_init.py130
7 files changed, 534 insertions, 41 deletions
diff --git a/cloudinit/cmd/devel/render.py b/cloudinit/cmd/devel/render.py
index e85933db..2ba6b681 100755
--- a/cloudinit/cmd/devel/render.py
+++ b/cloudinit/cmd/devel/render.py
@@ -9,7 +9,6 @@ import sys
from cloudinit.handlers.jinja_template import render_jinja_payload_from_file
from cloudinit import log
from cloudinit.sources import INSTANCE_JSON_FILE
-from cloudinit import util
from . import addLogHandlerCLI, read_cfg_paths
NAME = 'render'
@@ -54,11 +53,7 @@ def handle_args(name, args):
paths.run_dir, INSTANCE_JSON_FILE)
else:
instance_data_fn = args.instance_data
- try:
- with open(instance_data_fn) as stream:
- instance_data = stream.read()
- instance_data = util.load_json(instance_data)
- except IOError:
+ if not os.path.exists(instance_data_fn):
LOG.error('Missing instance-data.json file: %s', instance_data_fn)
return 1
try:
diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py
index 0eee583c..5a437020 100644
--- a/cloudinit/cmd/main.py
+++ b/cloudinit/cmd/main.py
@@ -791,6 +791,10 @@ def main(sysv_args=None):
' pass to this module'))
parser_single.set_defaults(action=('single', main_single))
+ parser_query = subparsers.add_parser(
+ 'query',
+ help='Query standardized instance metadata from the command line.')
+
parser_dhclient = subparsers.add_parser('dhclient-hook',
help=('run the dhclient hook'
'to record network info'))
@@ -842,6 +846,12 @@ def main(sysv_args=None):
clean_parser(parser_clean)
parser_clean.set_defaults(
action=('clean', handle_clean_args))
+ elif sysv_args[0] == 'query':
+ from cloudinit.cmd.query import (
+ get_parser as query_parser, handle_args as handle_query_args)
+ query_parser(parser_query)
+ parser_query.set_defaults(
+ action=('render', handle_query_args))
elif sysv_args[0] == 'status':
from cloudinit.cmd.status import (
get_parser as status_parser, handle_status_args)
diff --git a/cloudinit/cmd/query.py b/cloudinit/cmd/query.py
new file mode 100644
index 00000000..7d2d4fe4
--- /dev/null
+++ b/cloudinit/cmd/query.py
@@ -0,0 +1,155 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+"""Query standardized instance metadata from the command line."""
+
+import argparse
+import os
+import six
+import sys
+
+from cloudinit.handlers.jinja_template import (
+ convert_jinja_instance_data, render_jinja_payload)
+from cloudinit.cmd.devel import addLogHandlerCLI, read_cfg_paths
+from cloudinit import log
+from cloudinit.sources import (
+ INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE)
+from cloudinit import util
+
+NAME = 'query'
+LOG = log.getLogger(NAME)
+
+
+def get_parser(parser=None):
+ """Build or extend an arg parser for query utility.
+
+ @param parser: Optional existing ArgumentParser instance representing the
+ query subcommand which will be extended to support the args of
+ this utility.
+
+ @returns: ArgumentParser with proper argument configuration.
+ """
+ if not parser:
+ parser = argparse.ArgumentParser(
+ prog=NAME, description='Query cloud-init instance data')
+ parser.add_argument(
+ '-d', '--debug', action='store_true', default=False,
+ help='Add verbose messages during template render')
+ parser.add_argument(
+ '-i', '--instance-data', type=str,
+ help=('Path to instance-data.json file. Default is /run/cloud-init/%s'
+ % INSTANCE_JSON_FILE))
+ parser.add_argument(
+ '-l', '--list-keys', action='store_true', default=False,
+ help=('List query keys available at the provided instance-data'
+ ' <varname>.'))
+ parser.add_argument(
+ '-u', '--user-data', type=str,
+ help=('Path to user-data file. Default is'
+ ' /var/lib/cloud/instance/user-data.txt'))
+ parser.add_argument(
+ '-v', '--vendor-data', type=str,
+ help=('Path to vendor-data file. Default is'
+ ' /var/lib/cloud/instance/vendor-data.txt'))
+ parser.add_argument(
+ 'varname', type=str, nargs='?',
+ help=('A dot-delimited instance data variable to query from'
+ ' instance-data query. For example: v2.local_hostname'))
+ parser.add_argument(
+ '-a', '--all', action='store_true', default=False, dest='dump_all',
+ help='Dump all available instance-data')
+ parser.add_argument(
+ '-f', '--format', type=str, dest='format',
+ help=('Optionally specify a custom output format string. Any'
+ ' instance-data variable can be specified between double-curly'
+ ' braces. For example -f "{{ v2.cloud_name }}"'))
+ return parser
+
+
+def handle_args(name, args):
+ """Handle calls to 'cloud-init query' as a subcommand."""
+ paths = None
+ addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING)
+ if not any([args.list_keys, args.varname, args.format, args.dump_all]):
+ LOG.error(
+ 'Expected one of the options: --all, --format,'
+ ' --list-keys or varname')
+ get_parser().print_help()
+ return 1
+
+ uid = os.getuid()
+ if not all([args.instance_data, args.user_data, args.vendor_data]):
+ paths = read_cfg_paths()
+ if not args.instance_data:
+ if uid == 0:
+ default_json_fn = INSTANCE_JSON_SENSITIVE_FILE
+ else:
+ default_json_fn = INSTANCE_JSON_FILE # World readable
+ instance_data_fn = os.path.join(paths.run_dir, default_json_fn)
+ else:
+ instance_data_fn = args.instance_data
+ if not args.user_data:
+ user_data_fn = os.path.join(paths.instance_link, 'user-data.txt')
+ else:
+ user_data_fn = args.user_data
+ if not args.vendor_data:
+ vendor_data_fn = os.path.join(paths.instance_link, 'vendor-data.txt')
+ else:
+ vendor_data_fn = args.vendor_data
+
+ try:
+ instance_json = util.load_file(instance_data_fn)
+ except IOError:
+ LOG.error('Missing instance-data.json file: %s', instance_data_fn)
+ return 1
+
+ instance_data = util.load_json(instance_json)
+ if uid != 0:
+ instance_data['userdata'] = (
+ '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, user_data_fn))
+ instance_data['vendordata'] = (
+ '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, vendor_data_fn))
+ else:
+ instance_data['userdata'] = util.load_file(user_data_fn)
+ instance_data['vendordata'] = util.load_file(vendor_data_fn)
+ if args.format:
+ payload = '## template: jinja\n{fmt}'.format(fmt=args.format)
+ rendered_payload = render_jinja_payload(
+ payload=payload, payload_fn='query commandline',
+ instance_data=instance_data,
+ debug=True if args.debug else False)
+ if rendered_payload:
+ print(rendered_payload)
+ return 0
+ return 1
+
+ response = convert_jinja_instance_data(instance_data)
+ if args.varname:
+ try:
+ for var in args.varname.split('.'):
+ response = response[var]
+ except KeyError:
+ LOG.error('Undefined instance-data key %s', args.varname)
+ return 1
+ if args.list_keys:
+ if not isinstance(response, dict):
+ LOG.error("--list-keys provided but '%s' is not a dict", var)
+ return 1
+ response = '\n'.join(sorted(response.keys()))
+ elif args.list_keys:
+ response = '\n'.join(sorted(response.keys()))
+ if not isinstance(response, six.string_types):
+ response = util.json_dumps(response)
+ print(response)
+ return 0
+
+
+def main():
+ """Tool to query specific instance-data values."""
+ parser = get_parser()
+ sys.exit(handle_args(NAME, parser.parse_args()))
+
+
+if __name__ == '__main__':
+ main()
+
+# vi: ts=4 expandtab
diff --git a/cloudinit/cmd/tests/test_query.py b/cloudinit/cmd/tests/test_query.py
new file mode 100644
index 00000000..fb87c6ab
--- /dev/null
+++ b/cloudinit/cmd/tests/test_query.py
@@ -0,0 +1,193 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+from six import StringIO
+from textwrap import dedent
+import os
+
+from collections import namedtuple
+from cloudinit.cmd import query
+from cloudinit.helpers import Paths
+from cloudinit.sources import REDACT_SENSITIVE_VALUE, INSTANCE_JSON_FILE
+from cloudinit.tests.helpers import CiTestCase, mock
+from cloudinit.util import ensure_dir, write_file
+
+
+class TestQuery(CiTestCase):
+
+ with_logs = True
+
+ args = namedtuple(
+ 'queryargs',
+ ('debug dump_all format instance_data list_keys user_data vendor_data'
+ ' varname'))
+
+ def setUp(self):
+ super(TestQuery, self).setUp()
+ self.tmp = self.tmp_dir()
+ self.instance_data = self.tmp_path('instance-data', dir=self.tmp)
+
+ def test_handle_args_error_on_missing_param(self):
+ """Error when missing required parameters and print usage."""
+ args = self.args(
+ debug=False, dump_all=False, format=None, instance_data=None,
+ list_keys=False, user_data=None, vendor_data=None, varname=None)
+ with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr:
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(1, query.handle_args('anyname', args))
+ expected_error = (
+ 'ERROR: Expected one of the options: --all, --format, --list-keys'
+ ' or varname\n')
+ self.assertIn(expected_error, self.logs.getvalue())
+ self.assertIn('usage: query', m_stdout.getvalue())
+ self.assertIn(expected_error, m_stderr.getvalue())
+
+ def test_handle_args_error_on_missing_instance_data(self):
+ """When instance_data file path does not exist, log an error."""
+ absent_fn = self.tmp_path('absent', dir=self.tmp)
+ args = self.args(
+ debug=False, dump_all=True, format=None, instance_data=absent_fn,
+ list_keys=False, user_data='ud', vendor_data='vd', varname=None)
+ with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr:
+ self.assertEqual(1, query.handle_args('anyname', args))
+ self.assertIn(
+ 'ERROR: Missing instance-data.json file: %s' % absent_fn,
+ self.logs.getvalue())
+ self.assertIn(
+ 'ERROR: Missing instance-data.json file: %s' % absent_fn,
+ m_stderr.getvalue())
+
+ def test_handle_args_defaults_instance_data(self):
+ """When no instance_data argument, default to configured run_dir."""
+ args = self.args(
+ debug=False, dump_all=True, format=None, instance_data=None,
+ list_keys=False, user_data=None, vendor_data=None, varname=None)
+ run_dir = self.tmp_path('run_dir', dir=self.tmp)
+ ensure_dir(run_dir)
+ paths = Paths({'run_dir': run_dir})
+ self.add_patch('cloudinit.cmd.query.read_cfg_paths', 'm_paths')
+ self.m_paths.return_value = paths
+ with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr:
+ self.assertEqual(1, query.handle_args('anyname', args))
+ json_file = os.path.join(run_dir, INSTANCE_JSON_FILE)
+ self.assertIn(
+ 'ERROR: Missing instance-data.json file: %s' % json_file,
+ self.logs.getvalue())
+ self.assertIn(
+ 'ERROR: Missing instance-data.json file: %s' % json_file,
+ m_stderr.getvalue())
+
+ def test_handle_args_dumps_all_instance_data(self):
+ """When --all is specified query will dump all instance data vars."""
+ write_file(self.instance_data, '{"my-var": "it worked"}')
+ args = self.args(
+ debug=False, dump_all=True, format=None,
+ instance_data=self.instance_data, list_keys=False,
+ user_data='ud', vendor_data='vd', varname=None)
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(0, query.handle_args('anyname', args))
+ self.assertEqual(
+ '{\n "my_var": "it worked",\n "userdata": "<%s> file:ud",\n'
+ ' "vendordata": "<%s> file:vd"\n}\n' % (
+ REDACT_SENSITIVE_VALUE, REDACT_SENSITIVE_VALUE),
+ m_stdout.getvalue())
+
+ def test_handle_args_returns_top_level_varname(self):
+ """When the argument varname is passed, report its value."""
+ write_file(self.instance_data, '{"my-var": "it worked"}')
+ args = self.args(
+ debug=False, dump_all=True, format=None,
+ instance_data=self.instance_data, list_keys=False,
+ user_data='ud', vendor_data='vd', varname='my_var')
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(0, query.handle_args('anyname', args))
+ self.assertEqual('it worked\n', m_stdout.getvalue())
+
+ def test_handle_args_returns_nested_varname(self):
+ """If user_data file is a jinja template render instance-data vars."""
+ write_file(self.instance_data,
+ '{"v1": {"key-2": "value-2"}, "my-var": "it worked"}')
+ args = self.args(
+ debug=False, dump_all=False, format=None,
+ instance_data=self.instance_data, user_data='ud', vendor_data='vd',
+ list_keys=False, varname='v1.key_2')
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(0, query.handle_args('anyname', args))
+ self.assertEqual('value-2\n', m_stdout.getvalue())
+
+ def test_handle_args_returns_standardized_vars_to_top_level_aliases(self):
+ """Any standardized vars under v# are promoted as top-level aliases."""
+ write_file(
+ self.instance_data,
+ '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},'
+ ' "top": "gun"}')
+ expected = dedent("""\
+ {
+ "top": "gun",
+ "userdata": "<redacted for non-root user> file:ud",
+ "v1": {
+ "v1_1": "val1.1"
+ },
+ "v1_1": "val1.1",
+ "v2": {
+ "v2_2": "val2.2"
+ },
+ "v2_2": "val2.2",
+ "vendordata": "<redacted for non-root user> file:vd"
+ }
+ """)
+ args = self.args(
+ debug=False, dump_all=True, format=None,
+ instance_data=self.instance_data, user_data='ud', vendor_data='vd',
+ list_keys=False, varname=None)
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(0, query.handle_args('anyname', args))
+ self.assertEqual(expected, m_stdout.getvalue())
+
+ def test_handle_args_list_keys_sorts_top_level_keys_when_no_varname(self):
+ """Sort all top-level keys when only --list-keys provided."""
+ write_file(
+ self.instance_data,
+ '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},'
+ ' "top": "gun"}')
+ expected = 'top\nuserdata\nv1\nv1_1\nv2\nv2_2\nvendordata\n'
+ args = self.args(
+ debug=False, dump_all=False, format=None,
+ instance_data=self.instance_data, list_keys=True, user_data='ud',
+ vendor_data='vd', varname=None)
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(0, query.handle_args('anyname', args))
+ self.assertEqual(expected, m_stdout.getvalue())
+
+ def test_handle_args_list_keys_sorts_nested_keys_when_varname(self):
+ """Sort all nested keys of varname object when --list-keys provided."""
+ write_file(
+ self.instance_data,
+ '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2":' +
+ ' {"v2_2": "val2.2"}, "top": "gun"}')
+ expected = 'v1_1\nv1_2\n'
+ args = self.args(
+ debug=False, dump_all=False, format=None,
+ instance_data=self.instance_data, list_keys=True,
+ user_data='ud', vendor_data='vd', varname='v1')
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(0, query.handle_args('anyname', args))
+ self.assertEqual(expected, m_stdout.getvalue())
+
+ def test_handle_args_list_keys_errors_when_varname_is_not_a_dict(self):
+ """Raise an error when --list-keys and varname specify a non-list."""
+ write_file(
+ self.instance_data,
+ '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2": ' +
+ '{"v2_2": "val2.2"}, "top": "gun"}')
+ expected_error = "ERROR: --list-keys provided but 'top' is not a dict"
+ args = self.args(
+ debug=False, dump_all=False, format=None,
+ instance_data=self.instance_data, list_keys=True, user_data='ud',
+ vendor_data='vd', varname='top')
+ with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr:
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(1, query.handle_args('anyname', args))
+ self.assertEqual('', m_stdout.getvalue())
+ self.assertIn(expected_error, m_stderr.getvalue())
+
+# vi: ts=4 expandtab
diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py
index 3cc1fb19..dcd2645e 100644
--- a/cloudinit/helpers.py
+++ b/cloudinit/helpers.py
@@ -239,6 +239,10 @@ class ConfigMerger(object):
if cc_fn and os.path.isfile(cc_fn):
try:
i_cfgs.append(util.read_conf(cc_fn))
+ except PermissionError:
+ LOG.debug(
+ 'Skipped loading cloud-config from %s due to'
+ ' non-root.', cc_fn)
except Exception:
util.logexc(LOG, 'Failed loading of cloud-config from %s',
cc_fn)
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index a775f1a8..730e8174 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -38,8 +38,12 @@ DEP_FILESYSTEM = "FILESYSTEM"
DEP_NETWORK = "NETWORK"
DS_PREFIX = 'DataSource'
-# File in which instance meta-data, user-data and vendor-data is written
+# File in which public available instance meta-data is written
+# security-sensitive key values are redacted from this world-readable file
INSTANCE_JSON_FILE = 'instance-data.json'
+# security-sensitive key values are present in this root-readable file
+INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json'
+REDACT_SENSITIVE_VALUE = 'redacted for non-root user'
# Key which can be provide a cloud's official product name to cloud-init
METADATA_CLOUD_NAME_KEY = 'cloud-name'
@@ -58,7 +62,7 @@ class InvalidMetaDataException(Exception):
pass
-def process_instance_metadata(metadata, key_path=''):
+def process_instance_metadata(metadata, key_path='', sensitive_keys=()):
"""Process all instance metadata cleaning it up for persisting as json.
Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list
@@ -67,22 +71,46 @@ def process_instance_metadata(metadata, key_path=''):
"""
md_copy = copy.deepcopy(metadata)
md_copy['base64_encoded_keys'] = []
+ md_copy['sensitive_keys'] = []
for key, val in metadata.items():
if key_path:
sub_key_path = key_path + '/' + key
else:
sub_key_path = key
+ if key in sensitive_keys or sub_key_path in sensitive_keys:
+ md_copy['sensitive_keys'].append(sub_key_path)
if isinstance(val, str) and val.startswith('ci-b64:'):
md_copy['base64_encoded_keys'].append(sub_key_path)
md_copy[key] = val.replace('ci-b64:', '')
if isinstance(val, dict):
- return_val = process_instance_metadata(val, sub_key_path)
+ return_val = process_instance_metadata(
+ val, sub_key_path, sensitive_keys)
md_copy['base64_encoded_keys'].extend(
return_val.pop('base64_encoded_keys'))
+ md_copy['sensitive_keys'].extend(
+ return_val.pop('sensitive_keys'))
md_copy[key] = return_val
return md_copy
+def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE):
+ """Redact any sensitive keys from to provided metadata dictionary.
+
+ Replace any keys values listed in 'sensitive_keys' with redact_value.
+ """
+ if not metadata.get('sensitive_keys', []):
+ return metadata
+ md_copy = copy.deepcopy(metadata)
+ for key_path in metadata.get('sensitive_keys'):
+ path_parts = key_path.split('/')
+ obj = md_copy
+ for path in path_parts:
+ if isinstance(obj[path], dict) and path != path_parts[-1]:
+ obj = obj[path]
+ obj[path] = redact_value
+ return md_copy
+
+
URLParams = namedtuple(
'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries'])
@@ -127,6 +155,10 @@ class DataSource(object):
_dirty_cache = False
+ # N-tuple of keypaths or keynames redact from instance-data.json for
+ # non-root users
+ sensitive_metadata_keys = ('security-credentials',)
+
def __init__(self, sys_cfg, distro, paths, ud_proc=None):
self.sys_cfg = sys_cfg
self.distro = distro
@@ -152,12 +184,24 @@ class DataSource(object):
def _get_standardized_metadata(self):
"""Return a dictionary of standardized metadata keys."""
- return {'v1': {
- 'local-hostname': self.get_hostname(),
- 'instance-id': self.get_instance_id(),
- 'cloud-name': self.cloud_name,
- 'region': self.region,
- 'availability-zone': self.availability_zone}}
+ local_hostname = self.get_hostname()
+ instance_id = self.get_instance_id()
+ availability_zone = self.availability_zone
+ cloud_name = self.cloud_name
+ # When adding new standard keys prefer underscore-delimited instead
+ # of hyphen-delimted to support simple variable references in jinja
+ # templates.
+ return {
+ 'v1': {
+ 'availability-zone': availability_zone,
+ 'availability_zone': availability_zone,
+ 'cloud-name': cloud_name,
+ 'cloud_name': cloud_name,
+ 'instance-id': instance_id,
+ 'instance_id': instance_id,
+ 'local-hostname': local_hostname,
+ 'local_hostname': local_hostname,
+ 'region': self.region}}
def clear_cached_attrs(self, attr_defaults=()):
"""Reset any cached metadata attributes to datasource defaults.
@@ -200,9 +244,7 @@ class DataSource(object):
"""
instance_data = {
'ds': {
- 'meta_data': self.metadata,
- 'user_data': self.get_userdata_raw(),
- 'vendor_data': self.get_vendordata_raw()}}
+ 'meta_data': self.metadata}}
if hasattr(self, 'network_json'):
network_json = getattr(self, 'network_json')
if network_json != UNSET:
@@ -217,7 +259,9 @@ class DataSource(object):
# Process content base64encoding unserializable values
content = util.json_dumps(instance_data)
# Strip base64: prefix and set base64_encoded_keys list.
- processed_data = process_instance_metadata(json.loads(content))
+ processed_data = process_instance_metadata(
+ json.loads(content),
+ sensitive_keys=self.sensitive_metadata_keys)
except TypeError as e:
LOG.warning('Error persisting instance-data.json: %s', str(e))
return False
@@ -225,7 +269,11 @@ class DataSource(object):
LOG.warning('Error persisting instance-data.json: %s', str(e))
return False
json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE)
- write_json(json_file, processed_data, mode=0o600)
+ write_json(json_file, processed_data) # World readable
+ json_sensitive_file = os.path.join(self.paths.run_dir,
+ INSTANCE_JSON_SENSITIVE_FILE)
+ write_json(json_sensitive_file,
+ redact_sensitive_keys(processed_data), mode=0o600)
return True
def _get_data(self):
diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py
index 8299af23..6b965750 100644
--- a/cloudinit/sources/tests/test_init.py
+++ b/cloudinit/sources/tests/test_init.py
@@ -1,5 +1,6 @@
# This file is part of cloud-init. See LICENSE file for license information.
+import copy
import inspect
import os
import six
@@ -9,7 +10,8 @@ from cloudinit.event import EventType
from cloudinit.helpers import Paths
from cloudinit import importer
from cloudinit.sources import (
- INSTANCE_JSON_FILE, DataSource, UNSET)
+ INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE,
+ UNSET, DataSource, redact_sensitive_keys)
from cloudinit.tests.helpers import CiTestCase, skipIf, mock
from cloudinit.user_data import UserDataProcessor
from cloudinit import util
@@ -20,20 +22,24 @@ class DataSourceTestSubclassNet(DataSource):
dsname = 'MyTestSubclass'
url_max_wait = 55
- def __init__(self, sys_cfg, distro, paths, custom_userdata=None,
- get_data_retval=True):
+ def __init__(self, sys_cfg, distro, paths, custom_metadata=None,
+ custom_userdata=None, get_data_retval=True):
super(DataSourceTestSubclassNet, self).__init__(
sys_cfg, distro, paths)
self._custom_userdata = custom_userdata
+ self._custom_metadata = custom_metadata
self._get_data_retval = get_data_retval
def _get_cloud_name(self):
return 'SubclassCloudName'
def _get_data(self):
- self.metadata = {'availability_zone': 'myaz',
- 'local-hostname': 'test-subclass-hostname',
- 'region': 'myregion'}
+ if self._custom_metadata:
+ self.metadata = self._custom_metadata
+ else:
+ self.metadata = {'availability_zone': 'myaz',
+ 'local-hostname': 'test-subclass-hostname',
+ 'region': 'myregion'}
if self._custom_userdata:
self.userdata_raw = self._custom_userdata
else:
@@ -278,7 +284,7 @@ class TestDataSource(CiTestCase):
os.path.exists(json_file), 'Found unexpected file %s' % json_file)
def test_get_data_writes_json_instance_data_on_success(self):
- """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root."""
+ """get_data writes INSTANCE_JSON_FILE to run_dir as world readable."""
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}))
@@ -287,40 +293,90 @@ class TestDataSource(CiTestCase):
content = util.load_file(json_file)
expected = {
'base64_encoded_keys': [],
+ 'sensitive_keys': [],
'v1': {
'availability-zone': 'myaz',
+ 'availability_zone': 'myaz',
'cloud-name': 'subclasscloudname',
+ 'cloud_name': 'subclasscloudname',
'instance-id': 'iid-datasource',
+ 'instance_id': 'iid-datasource',
'local-hostname': 'test-subclass-hostname',
+ 'local_hostname': 'test-subclass-hostname',
'region': 'myregion'},
'ds': {
'meta_data': {'availability_zone': 'myaz',
'local-hostname': 'test-subclass-hostname',
- 'region': 'myregion'},
- 'user_data': 'userdata_raw',
- 'vendor_data': 'vendordata_raw'}}
- self.maxDiff = None
+ 'region': 'myregion'}}}
self.assertEqual(expected, util.load_json(content))
file_stat = os.stat(json_file)
+ self.assertEqual(0o644, stat.S_IMODE(file_stat.st_mode))
+ self.assertEqual(expected, util.load_json(content))
+
+ def test_get_data_writes_json_instance_data_sensitive(self):
+ """get_data writes INSTANCE_JSON_SENSITIVE_FILE as readonly root."""
+ tmp = self.tmp_dir()
+ datasource = DataSourceTestSubclassNet(
+ self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
+ custom_metadata={
+ 'availability_zone': 'myaz',
+ 'local-hostname': 'test-subclass-hostname',
+ 'region': 'myregion',
+ 'some': {'security-credentials': {
+ 'cred1': 'sekret', 'cred2': 'othersekret'}}})
+ self.assertEqual(
+ ('security-credentials',), datasource.sensitive_metadata_keys)
+ datasource.get_data()
+ json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
+ sensitive_json_file = self.tmp_path(INSTANCE_JSON_SENSITIVE_FILE, tmp)
+ redacted = util.load_json(util.load_file(json_file))
+ self.assertEqual(
+ {'cred1': 'sekret', 'cred2': 'othersekret'},
+ redacted['ds']['meta_data']['some']['security-credentials'])
+ content = util.load_file(sensitive_json_file)
+ expected = {
+ 'base64_encoded_keys': [],
+ 'sensitive_keys': ['ds/meta_data/some/security-credentials'],
+ 'v1': {
+ 'availability-zone': 'myaz',
+ 'availability_zone': 'myaz',
+ 'cloud-name': 'subclasscloudname',
+ 'cloud_name': 'subclasscloudname',
+ 'instance-id': 'iid-datasource',
+ 'instance_id': 'iid-datasource',
+ 'local-hostname': 'test-subclass-hostname',
+ 'local_hostname': 'test-subclass-hostname',
+ 'region': 'myregion'},
+ 'ds': {
+ 'meta_data': {
+ 'availability_zone': 'myaz',
+ 'local-hostname': 'test-subclass-hostname',
+ 'region': 'myregion',
+ 'some': {'security-credentials': REDACT_SENSITIVE_VALUE}}}
+ }
+ self.maxDiff = None
+ self.assertEqual(expected, util.load_json(content))
+ file_stat = os.stat(sensitive_json_file)
self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode))
+ self.assertEqual(expected, util.load_json(content))
def test_get_data_handles_redacted_unserializable_content(self):
"""get_data warns unserializable content in INSTANCE_JSON_FILE."""
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
- custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}})
+ custom_metadata={'key1': 'val1', 'key2': {'key2.1': self.paths}})
datasource.get_data()
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
content = util.load_file(json_file)
- expected_userdata = {
+ expected_metadata = {
'key1': 'val1',
'key2': {
'key2.1': "Warning: redacted unserializable type <class"
" 'cloudinit.helpers.Paths'>"}}
instance_json = util.load_json(content)
self.assertEqual(
- expected_userdata, instance_json['ds']['user_data'])
+ expected_metadata, instance_json['ds']['meta_data'])
def test_persist_instance_data_writes_ec2_metadata_when_set(self):
"""When ec2_metadata class attribute is set, persist to json."""
@@ -361,17 +417,17 @@ class TestDataSource(CiTestCase):
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
- custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})
+ custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})
self.assertTrue(datasource.get_data())
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
content = util.load_file(json_file)
instance_json = util.load_json(content)
- self.assertEqual(
- ['ds/user_data/key2/key2.1'],
+ self.assertItemsEqual(
+ ['ds/meta_data/key2/key2.1'],
instance_json['base64_encoded_keys'])
self.assertEqual(
{'key1': 'val1', 'key2': {'key2.1': 'EjM='}},
- instance_json['ds']['user_data'])
+ instance_json['ds']['meta_data'])
@skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes")
def test_get_data_handles_bytes_values(self):
@@ -379,7 +435,7 @@ class TestDataSource(CiTestCase):
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
- custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})
+ custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})
self.assertTrue(datasource.get_data())
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
content = util.load_file(json_file)
@@ -387,7 +443,7 @@ class TestDataSource(CiTestCase):
self.assertEqual([], instance_json['base64_encoded_keys'])
self.assertEqual(
{'key1': 'val1', 'key2': {'key2.1': '\x123'}},
- instance_json['ds']['user_data'])
+ instance_json['ds']['meta_data'])
@skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8")
def test_non_utf8_encoding_logs_warning(self):
@@ -395,7 +451,7 @@ class TestDataSource(CiTestCase):
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
- custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}})
+ custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}})
self.assertTrue(datasource.get_data())
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
self.assertFalse(os.path.exists(json_file))
@@ -509,4 +565,36 @@ class TestDataSource(CiTestCase):
self.logs.getvalue())
+class TestRedactSensitiveData(CiTestCase):
+
+ def test_redact_sensitive_data_noop_when_no_sensitive_keys_present(self):
+ """When sensitive_keys is absent or empty from metadata do nothing."""
+ md = {'my': 'data'}
+ self.assertEqual(
+ md, redact_sensitive_keys(md, redact_value='redacted'))
+ md['sensitive_keys'] = []
+ self.assertEqual(
+ md, redact_sensitive_keys(md, redact_value='redacted'))
+
+ def test_redact_sensitive_data_redacts_exact_match_name(self):
+ """Only exact matched sensitive_keys are redacted from metadata."""
+ md = {'sensitive_keys': ['md/secure'],
+ 'md': {'secure': 's3kr1t', 'insecure': 'publik'}}
+ secure_md = copy.deepcopy(md)
+ secure_md['md']['secure'] = 'redacted'
+ self.assertEqual(
+ secure_md,
+ redact_sensitive_keys(md, redact_value='redacted'))
+
+ def test_redact_sensitive_data_does_redacts_with_default_string(self):
+ """When redact_value is absent, REDACT_SENSITIVE_VALUE is used."""
+ md = {'sensitive_keys': ['md/secure'],
+ 'md': {'secure': 's3kr1t', 'insecure': 'publik'}}
+ secure_md = copy.deepcopy(md)
+ secure_md['md']['secure'] = 'redacted for non-root user'
+ self.assertEqual(
+ secure_md,
+ redact_sensitive_keys(md))
+
+
# vi: ts=4 expandtab