summaryrefslogtreecommitdiff
path: root/cloudinit/sources/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'cloudinit/sources/__init__.py')
-rw-r--r--cloudinit/sources/__init__.py134
1 files changed, 104 insertions, 30 deletions
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index f424316a..5ac98826 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -38,8 +38,17 @@ DEP_FILESYSTEM = "FILESYSTEM"
DEP_NETWORK = "NETWORK"
DS_PREFIX = 'DataSource'
-# File in which instance meta-data, user-data and vendor-data is written
+EXPERIMENTAL_TEXT = (
+ "EXPERIMENTAL: The structure and format of content scoped under the 'ds'"
+ " key may change in subsequent releases of cloud-init.")
+
+
+# File in which public available instance meta-data is written
+# security-sensitive key values are redacted from this world-readable file
INSTANCE_JSON_FILE = 'instance-data.json'
+# security-sensitive key values are present in this root-readable file
+INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json'
+REDACT_SENSITIVE_VALUE = 'redacted for non-root user'
# Key which can be provide a cloud's official product name to cloud-init
METADATA_CLOUD_NAME_KEY = 'cloud-name'
@@ -58,26 +67,55 @@ class InvalidMetaDataException(Exception):
pass
-def process_base64_metadata(metadata, key_path=''):
- """Strip ci-b64 prefix and return metadata with base64-encoded-keys set."""
+def process_instance_metadata(metadata, key_path='', sensitive_keys=()):
+ """Process all instance metadata cleaning it up for persisting as json.
+
+ Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list
+
+ @return Dict copy of processed metadata.
+ """
md_copy = copy.deepcopy(metadata)
- md_copy['base64-encoded-keys'] = []
+ md_copy['base64_encoded_keys'] = []
+ md_copy['sensitive_keys'] = []
for key, val in metadata.items():
if key_path:
sub_key_path = key_path + '/' + key
else:
sub_key_path = key
+ if key in sensitive_keys or sub_key_path in sensitive_keys:
+ md_copy['sensitive_keys'].append(sub_key_path)
if isinstance(val, str) and val.startswith('ci-b64:'):
- md_copy['base64-encoded-keys'].append(sub_key_path)
+ md_copy['base64_encoded_keys'].append(sub_key_path)
md_copy[key] = val.replace('ci-b64:', '')
if isinstance(val, dict):
- return_val = process_base64_metadata(val, sub_key_path)
- md_copy['base64-encoded-keys'].extend(
- return_val.pop('base64-encoded-keys'))
+ return_val = process_instance_metadata(
+ val, sub_key_path, sensitive_keys)
+ md_copy['base64_encoded_keys'].extend(
+ return_val.pop('base64_encoded_keys'))
+ md_copy['sensitive_keys'].extend(
+ return_val.pop('sensitive_keys'))
md_copy[key] = return_val
return md_copy
+def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE):
+ """Redact any sensitive keys from to provided metadata dictionary.
+
+ Replace any keys values listed in 'sensitive_keys' with redact_value.
+ """
+ if not metadata.get('sensitive_keys', []):
+ return metadata
+ md_copy = copy.deepcopy(metadata)
+ for key_path in metadata.get('sensitive_keys'):
+ path_parts = key_path.split('/')
+ obj = md_copy
+ for path in path_parts:
+ if isinstance(obj[path], dict) and path != path_parts[-1]:
+ obj = obj[path]
+ obj[path] = redact_value
+ return md_copy
+
+
URLParams = namedtuple(
'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries'])
@@ -103,14 +141,14 @@ class DataSource(object):
url_timeout = 10 # timeout for each metadata url read attempt
url_retries = 5 # number of times to retry url upon 404
- # The datasource defines a list of supported EventTypes during which
+ # The datasource defines a set of supported EventTypes during which
# the datasource can react to changes in metadata and regenerate
# network configuration on metadata changes.
# A datasource which supports writing network config on each system boot
- # would set update_events = {'network': [EventType.BOOT]}
+ # would call update_events['network'].add(EventType.BOOT).
# Default: generate network config on new instance id (first boot).
- update_events = {'network': [EventType.BOOT_NEW_INSTANCE]}
+ update_events = {'network': set([EventType.BOOT_NEW_INSTANCE])}
# N-tuple listing default values for any metadata-related class
# attributes cached on an instance by a process_data runs. These attribute
@@ -122,6 +160,10 @@ class DataSource(object):
_dirty_cache = False
+ # N-tuple of keypaths or keynames redact from instance-data.json for
+ # non-root users
+ sensitive_metadata_keys = ('security-credentials',)
+
def __init__(self, sys_cfg, distro, paths, ud_proc=None):
self.sys_cfg = sys_cfg
self.distro = distro
@@ -147,12 +189,24 @@ class DataSource(object):
def _get_standardized_metadata(self):
"""Return a dictionary of standardized metadata keys."""
- return {'v1': {
- 'local-hostname': self.get_hostname(),
- 'instance-id': self.get_instance_id(),
- 'cloud-name': self.cloud_name,
- 'region': self.region,
- 'availability-zone': self.availability_zone}}
+ local_hostname = self.get_hostname()
+ instance_id = self.get_instance_id()
+ availability_zone = self.availability_zone
+ cloud_name = self.cloud_name
+ # When adding new standard keys prefer underscore-delimited instead
+ # of hyphen-delimted to support simple variable references in jinja
+ # templates.
+ return {
+ 'v1': {
+ 'availability-zone': availability_zone,
+ 'availability_zone': availability_zone,
+ 'cloud-name': cloud_name,
+ 'cloud_name': cloud_name,
+ 'instance-id': instance_id,
+ 'instance_id': instance_id,
+ 'local-hostname': local_hostname,
+ 'local_hostname': local_hostname,
+ 'region': self.region}}
def clear_cached_attrs(self, attr_defaults=()):
"""Reset any cached metadata attributes to datasource defaults.
@@ -180,15 +234,22 @@ class DataSource(object):
"""
self._dirty_cache = True
return_value = self._get_data()
- json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE)
if not return_value:
return return_value
+ self.persist_instance_data()
+ return return_value
+
+ def persist_instance_data(self):
+ """Process and write INSTANCE_JSON_FILE with all instance metadata.
+ Replace any hyphens with underscores in key names for use in template
+ processing.
+
+ @return True on successful write, False otherwise.
+ """
instance_data = {
- 'ds': {
- 'meta-data': self.metadata,
- 'user-data': self.get_userdata_raw(),
- 'vendor-data': self.get_vendordata_raw()}}
+ 'ds': {'_doc': EXPERIMENTAL_TEXT,
+ 'meta_data': self.metadata}}
if hasattr(self, 'network_json'):
network_json = getattr(self, 'network_json')
if network_json != UNSET:
@@ -202,16 +263,23 @@ class DataSource(object):
try:
# Process content base64encoding unserializable values
content = util.json_dumps(instance_data)
- # Strip base64: prefix and return base64-encoded-keys
- processed_data = process_base64_metadata(json.loads(content))
+ # Strip base64: prefix and set base64_encoded_keys list.
+ processed_data = process_instance_metadata(
+ json.loads(content),
+ sensitive_keys=self.sensitive_metadata_keys)
except TypeError as e:
LOG.warning('Error persisting instance-data.json: %s', str(e))
- return return_value
+ return False
except UnicodeDecodeError as e:
LOG.warning('Error persisting instance-data.json: %s', str(e))
- return return_value
- write_json(json_file, processed_data, mode=0o600)
- return return_value
+ return False
+ json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE)
+ write_json(json_file, processed_data) # World readable
+ json_sensitive_file = os.path.join(self.paths.run_dir,
+ INSTANCE_JSON_SENSITIVE_FILE)
+ write_json(json_sensitive_file,
+ redact_sensitive_keys(processed_data), mode=0o600)
+ return True
def _get_data(self):
"""Walk metadata sources, process crawled data and save attributes."""
@@ -475,8 +543,8 @@ class DataSource(object):
for update_scope, update_events in self.update_events.items():
if event in update_events:
if not supported_events.get(update_scope):
- supported_events[update_scope] = []
- supported_events[update_scope].append(event)
+ supported_events[update_scope] = set()
+ supported_events[update_scope].add(event)
for scope, matched_events in supported_events.items():
LOG.debug(
"Update datasource metadata and %s config due to events: %s",
@@ -490,6 +558,8 @@ class DataSource(object):
result = self.get_data()
if result:
return True
+ LOG.debug("Datasource %s not updated for events: %s", self,
+ ', '.join(source_event_types))
return False
def check_instance_id(self, sys_cfg):
@@ -669,6 +739,10 @@ def convert_vendordata(data, recurse=True):
raise ValueError("Unknown data type for vendordata: %s" % type(data))
+class BrokenMetadata(IOError):
+ pass
+
+
# 'depends' is a list of dependencies (DEP_FILESYSTEM)
# ds_list is a list of 2 item lists
# ds_list = [