summaryrefslogtreecommitdiff
path: root/cloudinit/sources
diff options
context:
space:
mode:
Diffstat (limited to 'cloudinit/sources')
-rw-r--r--cloudinit/sources/__init__.py76
-rw-r--r--cloudinit/sources/tests/test_init.py130
2 files changed, 171 insertions, 35 deletions
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index a775f1a8..730e8174 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -38,8 +38,12 @@ DEP_FILESYSTEM = "FILESYSTEM"
DEP_NETWORK = "NETWORK"
DS_PREFIX = 'DataSource'
-# File in which instance meta-data, user-data and vendor-data is written
+# File in which public available instance meta-data is written
+# security-sensitive key values are redacted from this world-readable file
INSTANCE_JSON_FILE = 'instance-data.json'
+# security-sensitive key values are present in this root-readable file
+INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json'
+REDACT_SENSITIVE_VALUE = 'redacted for non-root user'
# Key which can be provide a cloud's official product name to cloud-init
METADATA_CLOUD_NAME_KEY = 'cloud-name'
@@ -58,7 +62,7 @@ class InvalidMetaDataException(Exception):
pass
-def process_instance_metadata(metadata, key_path=''):
+def process_instance_metadata(metadata, key_path='', sensitive_keys=()):
"""Process all instance metadata cleaning it up for persisting as json.
Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list
@@ -67,22 +71,46 @@ def process_instance_metadata(metadata, key_path=''):
"""
md_copy = copy.deepcopy(metadata)
md_copy['base64_encoded_keys'] = []
+ md_copy['sensitive_keys'] = []
for key, val in metadata.items():
if key_path:
sub_key_path = key_path + '/' + key
else:
sub_key_path = key
+ if key in sensitive_keys or sub_key_path in sensitive_keys:
+ md_copy['sensitive_keys'].append(sub_key_path)
if isinstance(val, str) and val.startswith('ci-b64:'):
md_copy['base64_encoded_keys'].append(sub_key_path)
md_copy[key] = val.replace('ci-b64:', '')
if isinstance(val, dict):
- return_val = process_instance_metadata(val, sub_key_path)
+ return_val = process_instance_metadata(
+ val, sub_key_path, sensitive_keys)
md_copy['base64_encoded_keys'].extend(
return_val.pop('base64_encoded_keys'))
+ md_copy['sensitive_keys'].extend(
+ return_val.pop('sensitive_keys'))
md_copy[key] = return_val
return md_copy
+def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE):
+ """Redact any sensitive keys from to provided metadata dictionary.
+
+ Replace any keys values listed in 'sensitive_keys' with redact_value.
+ """
+ if not metadata.get('sensitive_keys', []):
+ return metadata
+ md_copy = copy.deepcopy(metadata)
+ for key_path in metadata.get('sensitive_keys'):
+ path_parts = key_path.split('/')
+ obj = md_copy
+ for path in path_parts:
+ if isinstance(obj[path], dict) and path != path_parts[-1]:
+ obj = obj[path]
+ obj[path] = redact_value
+ return md_copy
+
+
URLParams = namedtuple(
'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries'])
@@ -127,6 +155,10 @@ class DataSource(object):
_dirty_cache = False
+ # N-tuple of keypaths or keynames redact from instance-data.json for
+ # non-root users
+ sensitive_metadata_keys = ('security-credentials',)
+
def __init__(self, sys_cfg, distro, paths, ud_proc=None):
self.sys_cfg = sys_cfg
self.distro = distro
@@ -152,12 +184,24 @@ class DataSource(object):
def _get_standardized_metadata(self):
"""Return a dictionary of standardized metadata keys."""
- return {'v1': {
- 'local-hostname': self.get_hostname(),
- 'instance-id': self.get_instance_id(),
- 'cloud-name': self.cloud_name,
- 'region': self.region,
- 'availability-zone': self.availability_zone}}
+ local_hostname = self.get_hostname()
+ instance_id = self.get_instance_id()
+ availability_zone = self.availability_zone
+ cloud_name = self.cloud_name
+ # When adding new standard keys prefer underscore-delimited instead
+ # of hyphen-delimted to support simple variable references in jinja
+ # templates.
+ return {
+ 'v1': {
+ 'availability-zone': availability_zone,
+ 'availability_zone': availability_zone,
+ 'cloud-name': cloud_name,
+ 'cloud_name': cloud_name,
+ 'instance-id': instance_id,
+ 'instance_id': instance_id,
+ 'local-hostname': local_hostname,
+ 'local_hostname': local_hostname,
+ 'region': self.region}}
def clear_cached_attrs(self, attr_defaults=()):
"""Reset any cached metadata attributes to datasource defaults.
@@ -200,9 +244,7 @@ class DataSource(object):
"""
instance_data = {
'ds': {
- 'meta_data': self.metadata,
- 'user_data': self.get_userdata_raw(),
- 'vendor_data': self.get_vendordata_raw()}}
+ 'meta_data': self.metadata}}
if hasattr(self, 'network_json'):
network_json = getattr(self, 'network_json')
if network_json != UNSET:
@@ -217,7 +259,9 @@ class DataSource(object):
# Process content base64encoding unserializable values
content = util.json_dumps(instance_data)
# Strip base64: prefix and set base64_encoded_keys list.
- processed_data = process_instance_metadata(json.loads(content))
+ processed_data = process_instance_metadata(
+ json.loads(content),
+ sensitive_keys=self.sensitive_metadata_keys)
except TypeError as e:
LOG.warning('Error persisting instance-data.json: %s', str(e))
return False
@@ -225,7 +269,11 @@ class DataSource(object):
LOG.warning('Error persisting instance-data.json: %s', str(e))
return False
json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE)
- write_json(json_file, processed_data, mode=0o600)
+ write_json(json_file, processed_data) # World readable
+ json_sensitive_file = os.path.join(self.paths.run_dir,
+ INSTANCE_JSON_SENSITIVE_FILE)
+ write_json(json_sensitive_file,
+ redact_sensitive_keys(processed_data), mode=0o600)
return True
def _get_data(self):
diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py
index 8299af23..6b965750 100644
--- a/cloudinit/sources/tests/test_init.py
+++ b/cloudinit/sources/tests/test_init.py
@@ -1,5 +1,6 @@
# This file is part of cloud-init. See LICENSE file for license information.
+import copy
import inspect
import os
import six
@@ -9,7 +10,8 @@ from cloudinit.event import EventType
from cloudinit.helpers import Paths
from cloudinit import importer
from cloudinit.sources import (
- INSTANCE_JSON_FILE, DataSource, UNSET)
+ INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE,
+ UNSET, DataSource, redact_sensitive_keys)
from cloudinit.tests.helpers import CiTestCase, skipIf, mock
from cloudinit.user_data import UserDataProcessor
from cloudinit import util
@@ -20,20 +22,24 @@ class DataSourceTestSubclassNet(DataSource):
dsname = 'MyTestSubclass'
url_max_wait = 55
- def __init__(self, sys_cfg, distro, paths, custom_userdata=None,
- get_data_retval=True):
+ def __init__(self, sys_cfg, distro, paths, custom_metadata=None,
+ custom_userdata=None, get_data_retval=True):
super(DataSourceTestSubclassNet, self).__init__(
sys_cfg, distro, paths)
self._custom_userdata = custom_userdata
+ self._custom_metadata = custom_metadata
self._get_data_retval = get_data_retval
def _get_cloud_name(self):
return 'SubclassCloudName'
def _get_data(self):
- self.metadata = {'availability_zone': 'myaz',
- 'local-hostname': 'test-subclass-hostname',
- 'region': 'myregion'}
+ if self._custom_metadata:
+ self.metadata = self._custom_metadata
+ else:
+ self.metadata = {'availability_zone': 'myaz',
+ 'local-hostname': 'test-subclass-hostname',
+ 'region': 'myregion'}
if self._custom_userdata:
self.userdata_raw = self._custom_userdata
else:
@@ -278,7 +284,7 @@ class TestDataSource(CiTestCase):
os.path.exists(json_file), 'Found unexpected file %s' % json_file)
def test_get_data_writes_json_instance_data_on_success(self):
- """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root."""
+ """get_data writes INSTANCE_JSON_FILE to run_dir as world readable."""
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}))
@@ -287,40 +293,90 @@ class TestDataSource(CiTestCase):
content = util.load_file(json_file)
expected = {
'base64_encoded_keys': [],
+ 'sensitive_keys': [],
'v1': {
'availability-zone': 'myaz',
+ 'availability_zone': 'myaz',
'cloud-name': 'subclasscloudname',
+ 'cloud_name': 'subclasscloudname',
'instance-id': 'iid-datasource',
+ 'instance_id': 'iid-datasource',
'local-hostname': 'test-subclass-hostname',
+ 'local_hostname': 'test-subclass-hostname',
'region': 'myregion'},
'ds': {
'meta_data': {'availability_zone': 'myaz',
'local-hostname': 'test-subclass-hostname',
- 'region': 'myregion'},
- 'user_data': 'userdata_raw',
- 'vendor_data': 'vendordata_raw'}}
- self.maxDiff = None
+ 'region': 'myregion'}}}
self.assertEqual(expected, util.load_json(content))
file_stat = os.stat(json_file)
+ self.assertEqual(0o644, stat.S_IMODE(file_stat.st_mode))
+ self.assertEqual(expected, util.load_json(content))
+
+ def test_get_data_writes_json_instance_data_sensitive(self):
+ """get_data writes INSTANCE_JSON_SENSITIVE_FILE as readonly root."""
+ tmp = self.tmp_dir()
+ datasource = DataSourceTestSubclassNet(
+ self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
+ custom_metadata={
+ 'availability_zone': 'myaz',
+ 'local-hostname': 'test-subclass-hostname',
+ 'region': 'myregion',
+ 'some': {'security-credentials': {
+ 'cred1': 'sekret', 'cred2': 'othersekret'}}})
+ self.assertEqual(
+ ('security-credentials',), datasource.sensitive_metadata_keys)
+ datasource.get_data()
+ json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
+ sensitive_json_file = self.tmp_path(INSTANCE_JSON_SENSITIVE_FILE, tmp)
+ redacted = util.load_json(util.load_file(json_file))
+ self.assertEqual(
+ {'cred1': 'sekret', 'cred2': 'othersekret'},
+ redacted['ds']['meta_data']['some']['security-credentials'])
+ content = util.load_file(sensitive_json_file)
+ expected = {
+ 'base64_encoded_keys': [],
+ 'sensitive_keys': ['ds/meta_data/some/security-credentials'],
+ 'v1': {
+ 'availability-zone': 'myaz',
+ 'availability_zone': 'myaz',
+ 'cloud-name': 'subclasscloudname',
+ 'cloud_name': 'subclasscloudname',
+ 'instance-id': 'iid-datasource',
+ 'instance_id': 'iid-datasource',
+ 'local-hostname': 'test-subclass-hostname',
+ 'local_hostname': 'test-subclass-hostname',
+ 'region': 'myregion'},
+ 'ds': {
+ 'meta_data': {
+ 'availability_zone': 'myaz',
+ 'local-hostname': 'test-subclass-hostname',
+ 'region': 'myregion',
+ 'some': {'security-credentials': REDACT_SENSITIVE_VALUE}}}
+ }
+ self.maxDiff = None
+ self.assertEqual(expected, util.load_json(content))
+ file_stat = os.stat(sensitive_json_file)
self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode))
+ self.assertEqual(expected, util.load_json(content))
def test_get_data_handles_redacted_unserializable_content(self):
"""get_data warns unserializable content in INSTANCE_JSON_FILE."""
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
- custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}})
+ custom_metadata={'key1': 'val1', 'key2': {'key2.1': self.paths}})
datasource.get_data()
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
content = util.load_file(json_file)
- expected_userdata = {
+ expected_metadata = {
'key1': 'val1',
'key2': {
'key2.1': "Warning: redacted unserializable type <class"
" 'cloudinit.helpers.Paths'>"}}
instance_json = util.load_json(content)
self.assertEqual(
- expected_userdata, instance_json['ds']['user_data'])
+ expected_metadata, instance_json['ds']['meta_data'])
def test_persist_instance_data_writes_ec2_metadata_when_set(self):
"""When ec2_metadata class attribute is set, persist to json."""
@@ -361,17 +417,17 @@ class TestDataSource(CiTestCase):
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
- custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})
+ custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})
self.assertTrue(datasource.get_data())
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
content = util.load_file(json_file)
instance_json = util.load_json(content)
- self.assertEqual(
- ['ds/user_data/key2/key2.1'],
+ self.assertItemsEqual(
+ ['ds/meta_data/key2/key2.1'],
instance_json['base64_encoded_keys'])
self.assertEqual(
{'key1': 'val1', 'key2': {'key2.1': 'EjM='}},
- instance_json['ds']['user_data'])
+ instance_json['ds']['meta_data'])
@skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes")
def test_get_data_handles_bytes_values(self):
@@ -379,7 +435,7 @@ class TestDataSource(CiTestCase):
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
- custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})
+ custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})
self.assertTrue(datasource.get_data())
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
content = util.load_file(json_file)
@@ -387,7 +443,7 @@ class TestDataSource(CiTestCase):
self.assertEqual([], instance_json['base64_encoded_keys'])
self.assertEqual(
{'key1': 'val1', 'key2': {'key2.1': '\x123'}},
- instance_json['ds']['user_data'])
+ instance_json['ds']['meta_data'])
@skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8")
def test_non_utf8_encoding_logs_warning(self):
@@ -395,7 +451,7 @@ class TestDataSource(CiTestCase):
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
- custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}})
+ custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}})
self.assertTrue(datasource.get_data())
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
self.assertFalse(os.path.exists(json_file))
@@ -509,4 +565,36 @@ class TestDataSource(CiTestCase):
self.logs.getvalue())
+class TestRedactSensitiveData(CiTestCase):
+
+ def test_redact_sensitive_data_noop_when_no_sensitive_keys_present(self):
+ """When sensitive_keys is absent or empty from metadata do nothing."""
+ md = {'my': 'data'}
+ self.assertEqual(
+ md, redact_sensitive_keys(md, redact_value='redacted'))
+ md['sensitive_keys'] = []
+ self.assertEqual(
+ md, redact_sensitive_keys(md, redact_value='redacted'))
+
+ def test_redact_sensitive_data_redacts_exact_match_name(self):
+ """Only exact matched sensitive_keys are redacted from metadata."""
+ md = {'sensitive_keys': ['md/secure'],
+ 'md': {'secure': 's3kr1t', 'insecure': 'publik'}}
+ secure_md = copy.deepcopy(md)
+ secure_md['md']['secure'] = 'redacted'
+ self.assertEqual(
+ secure_md,
+ redact_sensitive_keys(md, redact_value='redacted'))
+
+ def test_redact_sensitive_data_does_redacts_with_default_string(self):
+ """When redact_value is absent, REDACT_SENSITIVE_VALUE is used."""
+ md = {'sensitive_keys': ['md/secure'],
+ 'md': {'secure': 's3kr1t', 'insecure': 'publik'}}
+ secure_md = copy.deepcopy(md)
+ secure_md['md']['secure'] = 'redacted for non-root user'
+ self.assertEqual(
+ secure_md,
+ redact_sensitive_keys(md))
+
+
# vi: ts=4 expandtab