diff options
Diffstat (limited to 'cloudinit/sources')
-rw-r--r-- | cloudinit/sources/__init__.py | 76 | ||||
-rw-r--r-- | cloudinit/sources/tests/test_init.py | 130 |
2 files changed, 171 insertions, 35 deletions
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index a775f1a8..730e8174 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -38,8 +38,12 @@ DEP_FILESYSTEM = "FILESYSTEM" DEP_NETWORK = "NETWORK" DS_PREFIX = 'DataSource' -# File in which instance meta-data, user-data and vendor-data is written +# File in which public available instance meta-data is written +# security-sensitive key values are redacted from this world-readable file INSTANCE_JSON_FILE = 'instance-data.json' +# security-sensitive key values are present in this root-readable file +INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json' +REDACT_SENSITIVE_VALUE = 'redacted for non-root user' # Key which can be provide a cloud's official product name to cloud-init METADATA_CLOUD_NAME_KEY = 'cloud-name' @@ -58,7 +62,7 @@ class InvalidMetaDataException(Exception): pass -def process_instance_metadata(metadata, key_path=''): +def process_instance_metadata(metadata, key_path='', sensitive_keys=()): """Process all instance metadata cleaning it up for persisting as json. Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list @@ -67,22 +71,46 @@ def process_instance_metadata(metadata, key_path=''): """ md_copy = copy.deepcopy(metadata) md_copy['base64_encoded_keys'] = [] + md_copy['sensitive_keys'] = [] for key, val in metadata.items(): if key_path: sub_key_path = key_path + '/' + key else: sub_key_path = key + if key in sensitive_keys or sub_key_path in sensitive_keys: + md_copy['sensitive_keys'].append(sub_key_path) if isinstance(val, str) and val.startswith('ci-b64:'): md_copy['base64_encoded_keys'].append(sub_key_path) md_copy[key] = val.replace('ci-b64:', '') if isinstance(val, dict): - return_val = process_instance_metadata(val, sub_key_path) + return_val = process_instance_metadata( + val, sub_key_path, sensitive_keys) md_copy['base64_encoded_keys'].extend( return_val.pop('base64_encoded_keys')) + md_copy['sensitive_keys'].extend( + return_val.pop('sensitive_keys')) md_copy[key] = return_val return md_copy +def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE): + """Redact any sensitive keys from to provided metadata dictionary. + + Replace any keys values listed in 'sensitive_keys' with redact_value. + """ + if not metadata.get('sensitive_keys', []): + return metadata + md_copy = copy.deepcopy(metadata) + for key_path in metadata.get('sensitive_keys'): + path_parts = key_path.split('/') + obj = md_copy + for path in path_parts: + if isinstance(obj[path], dict) and path != path_parts[-1]: + obj = obj[path] + obj[path] = redact_value + return md_copy + + URLParams = namedtuple( 'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries']) @@ -127,6 +155,10 @@ class DataSource(object): _dirty_cache = False + # N-tuple of keypaths or keynames redact from instance-data.json for + # non-root users + sensitive_metadata_keys = ('security-credentials',) + def __init__(self, sys_cfg, distro, paths, ud_proc=None): self.sys_cfg = sys_cfg self.distro = distro @@ -152,12 +184,24 @@ class DataSource(object): def _get_standardized_metadata(self): """Return a dictionary of standardized metadata keys.""" - return {'v1': { - 'local-hostname': self.get_hostname(), - 'instance-id': self.get_instance_id(), - 'cloud-name': self.cloud_name, - 'region': self.region, - 'availability-zone': self.availability_zone}} + local_hostname = self.get_hostname() + instance_id = self.get_instance_id() + availability_zone = self.availability_zone + cloud_name = self.cloud_name + # When adding new standard keys prefer underscore-delimited instead + # of hyphen-delimted to support simple variable references in jinja + # templates. + return { + 'v1': { + 'availability-zone': availability_zone, + 'availability_zone': availability_zone, + 'cloud-name': cloud_name, + 'cloud_name': cloud_name, + 'instance-id': instance_id, + 'instance_id': instance_id, + 'local-hostname': local_hostname, + 'local_hostname': local_hostname, + 'region': self.region}} def clear_cached_attrs(self, attr_defaults=()): """Reset any cached metadata attributes to datasource defaults. @@ -200,9 +244,7 @@ class DataSource(object): """ instance_data = { 'ds': { - 'meta_data': self.metadata, - 'user_data': self.get_userdata_raw(), - 'vendor_data': self.get_vendordata_raw()}} + 'meta_data': self.metadata}} if hasattr(self, 'network_json'): network_json = getattr(self, 'network_json') if network_json != UNSET: @@ -217,7 +259,9 @@ class DataSource(object): # Process content base64encoding unserializable values content = util.json_dumps(instance_data) # Strip base64: prefix and set base64_encoded_keys list. - processed_data = process_instance_metadata(json.loads(content)) + processed_data = process_instance_metadata( + json.loads(content), + sensitive_keys=self.sensitive_metadata_keys) except TypeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) return False @@ -225,7 +269,11 @@ class DataSource(object): LOG.warning('Error persisting instance-data.json: %s', str(e)) return False json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) - write_json(json_file, processed_data, mode=0o600) + write_json(json_file, processed_data) # World readable + json_sensitive_file = os.path.join(self.paths.run_dir, + INSTANCE_JSON_SENSITIVE_FILE) + write_json(json_sensitive_file, + redact_sensitive_keys(processed_data), mode=0o600) return True def _get_data(self): diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index 8299af23..6b965750 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -1,5 +1,6 @@ # This file is part of cloud-init. See LICENSE file for license information. +import copy import inspect import os import six @@ -9,7 +10,8 @@ from cloudinit.event import EventType from cloudinit.helpers import Paths from cloudinit import importer from cloudinit.sources import ( - INSTANCE_JSON_FILE, DataSource, UNSET) + INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE, + UNSET, DataSource, redact_sensitive_keys) from cloudinit.tests.helpers import CiTestCase, skipIf, mock from cloudinit.user_data import UserDataProcessor from cloudinit import util @@ -20,20 +22,24 @@ class DataSourceTestSubclassNet(DataSource): dsname = 'MyTestSubclass' url_max_wait = 55 - def __init__(self, sys_cfg, distro, paths, custom_userdata=None, - get_data_retval=True): + def __init__(self, sys_cfg, distro, paths, custom_metadata=None, + custom_userdata=None, get_data_retval=True): super(DataSourceTestSubclassNet, self).__init__( sys_cfg, distro, paths) self._custom_userdata = custom_userdata + self._custom_metadata = custom_metadata self._get_data_retval = get_data_retval def _get_cloud_name(self): return 'SubclassCloudName' def _get_data(self): - self.metadata = {'availability_zone': 'myaz', - 'local-hostname': 'test-subclass-hostname', - 'region': 'myregion'} + if self._custom_metadata: + self.metadata = self._custom_metadata + else: + self.metadata = {'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion'} if self._custom_userdata: self.userdata_raw = self._custom_userdata else: @@ -278,7 +284,7 @@ class TestDataSource(CiTestCase): os.path.exists(json_file), 'Found unexpected file %s' % json_file) def test_get_data_writes_json_instance_data_on_success(self): - """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root.""" + """get_data writes INSTANCE_JSON_FILE to run_dir as world readable.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp})) @@ -287,40 +293,90 @@ class TestDataSource(CiTestCase): content = util.load_file(json_file) expected = { 'base64_encoded_keys': [], + 'sensitive_keys': [], 'v1': { 'availability-zone': 'myaz', + 'availability_zone': 'myaz', 'cloud-name': 'subclasscloudname', + 'cloud_name': 'subclasscloudname', 'instance-id': 'iid-datasource', + 'instance_id': 'iid-datasource', 'local-hostname': 'test-subclass-hostname', + 'local_hostname': 'test-subclass-hostname', 'region': 'myregion'}, 'ds': { 'meta_data': {'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', - 'region': 'myregion'}, - 'user_data': 'userdata_raw', - 'vendor_data': 'vendordata_raw'}} - self.maxDiff = None + 'region': 'myregion'}}} self.assertEqual(expected, util.load_json(content)) file_stat = os.stat(json_file) + self.assertEqual(0o644, stat.S_IMODE(file_stat.st_mode)) + self.assertEqual(expected, util.load_json(content)) + + def test_get_data_writes_json_instance_data_sensitive(self): + """get_data writes INSTANCE_JSON_SENSITIVE_FILE as readonly root.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp}), + custom_metadata={ + 'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion', + 'some': {'security-credentials': { + 'cred1': 'sekret', 'cred2': 'othersekret'}}}) + self.assertEqual( + ('security-credentials',), datasource.sensitive_metadata_keys) + datasource.get_data() + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + sensitive_json_file = self.tmp_path(INSTANCE_JSON_SENSITIVE_FILE, tmp) + redacted = util.load_json(util.load_file(json_file)) + self.assertEqual( + {'cred1': 'sekret', 'cred2': 'othersekret'}, + redacted['ds']['meta_data']['some']['security-credentials']) + content = util.load_file(sensitive_json_file) + expected = { + 'base64_encoded_keys': [], + 'sensitive_keys': ['ds/meta_data/some/security-credentials'], + 'v1': { + 'availability-zone': 'myaz', + 'availability_zone': 'myaz', + 'cloud-name': 'subclasscloudname', + 'cloud_name': 'subclasscloudname', + 'instance-id': 'iid-datasource', + 'instance_id': 'iid-datasource', + 'local-hostname': 'test-subclass-hostname', + 'local_hostname': 'test-subclass-hostname', + 'region': 'myregion'}, + 'ds': { + 'meta_data': { + 'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion', + 'some': {'security-credentials': REDACT_SENSITIVE_VALUE}}} + } + self.maxDiff = None + self.assertEqual(expected, util.load_json(content)) + file_stat = os.stat(sensitive_json_file) self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode)) + self.assertEqual(expected, util.load_json(content)) def test_get_data_handles_redacted_unserializable_content(self): """get_data warns unserializable content in INSTANCE_JSON_FILE.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) datasource.get_data() json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) - expected_userdata = { + expected_metadata = { 'key1': 'val1', 'key2': { 'key2.1': "Warning: redacted unserializable type <class" " 'cloudinit.helpers.Paths'>"}} instance_json = util.load_json(content) self.assertEqual( - expected_userdata, instance_json['ds']['user_data']) + expected_metadata, instance_json['ds']['meta_data']) def test_persist_instance_data_writes_ec2_metadata_when_set(self): """When ec2_metadata class attribute is set, persist to json.""" @@ -361,17 +417,17 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) instance_json = util.load_json(content) - self.assertEqual( - ['ds/user_data/key2/key2.1'], + self.assertItemsEqual( + ['ds/meta_data/key2/key2.1'], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': 'EjM='}}, - instance_json['ds']['user_data']) + instance_json['ds']['meta_data']) @skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes") def test_get_data_handles_bytes_values(self): @@ -379,7 +435,7 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) @@ -387,7 +443,7 @@ class TestDataSource(CiTestCase): self.assertEqual([], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': '\x123'}}, - instance_json['ds']['user_data']) + instance_json['ds']['meta_data']) @skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8") def test_non_utf8_encoding_logs_warning(self): @@ -395,7 +451,7 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) self.assertFalse(os.path.exists(json_file)) @@ -509,4 +565,36 @@ class TestDataSource(CiTestCase): self.logs.getvalue()) +class TestRedactSensitiveData(CiTestCase): + + def test_redact_sensitive_data_noop_when_no_sensitive_keys_present(self): + """When sensitive_keys is absent or empty from metadata do nothing.""" + md = {'my': 'data'} + self.assertEqual( + md, redact_sensitive_keys(md, redact_value='redacted')) + md['sensitive_keys'] = [] + self.assertEqual( + md, redact_sensitive_keys(md, redact_value='redacted')) + + def test_redact_sensitive_data_redacts_exact_match_name(self): + """Only exact matched sensitive_keys are redacted from metadata.""" + md = {'sensitive_keys': ['md/secure'], + 'md': {'secure': 's3kr1t', 'insecure': 'publik'}} + secure_md = copy.deepcopy(md) + secure_md['md']['secure'] = 'redacted' + self.assertEqual( + secure_md, + redact_sensitive_keys(md, redact_value='redacted')) + + def test_redact_sensitive_data_does_redacts_with_default_string(self): + """When redact_value is absent, REDACT_SENSITIVE_VALUE is used.""" + md = {'sensitive_keys': ['md/secure'], + 'md': {'secure': 's3kr1t', 'insecure': 'publik'}} + secure_md = copy.deepcopy(md) + secure_md['md']['secure'] = 'redacted for non-root user' + self.assertEqual( + secure_md, + redact_sensitive_keys(md)) + + # vi: ts=4 expandtab |