diff options
author | Chad Smith <chad.smith@canonical.com> | 2018-10-03 12:10:23 -0600 |
---|---|---|
committer | Chad Smith <chad.smith@canonical.com> | 2018-10-03 12:10:23 -0600 |
commit | d6347e1c439eda7f43d9620dac2b461e980e1ae9 (patch) | |
tree | 08410263488d11a2a29edcc620575ed1b028100e /cloudinit/sources/__init__.py | |
parent | 564793a76b9c9add1ee81bab4919c8dccd45a33d (diff) | |
parent | e28000457591bde9f22d6b7a538b1fc33349d780 (diff) | |
download | vyos-cloud-init-d6347e1c439eda7f43d9620dac2b461e980e1ae9.tar.gz vyos-cloud-init-d6347e1c439eda7f43d9620dac2b461e980e1ae9.zip |
merge from master at 18.4
Diffstat (limited to 'cloudinit/sources/__init__.py')
-rw-r--r-- | cloudinit/sources/__init__.py | 134 |
1 files changed, 104 insertions, 30 deletions
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index f424316a..5ac98826 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -38,8 +38,17 @@ DEP_FILESYSTEM = "FILESYSTEM" DEP_NETWORK = "NETWORK" DS_PREFIX = 'DataSource' -# File in which instance meta-data, user-data and vendor-data is written +EXPERIMENTAL_TEXT = ( + "EXPERIMENTAL: The structure and format of content scoped under the 'ds'" + " key may change in subsequent releases of cloud-init.") + + +# File in which public available instance meta-data is written +# security-sensitive key values are redacted from this world-readable file INSTANCE_JSON_FILE = 'instance-data.json' +# security-sensitive key values are present in this root-readable file +INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json' +REDACT_SENSITIVE_VALUE = 'redacted for non-root user' # Key which can be provide a cloud's official product name to cloud-init METADATA_CLOUD_NAME_KEY = 'cloud-name' @@ -58,26 +67,55 @@ class InvalidMetaDataException(Exception): pass -def process_base64_metadata(metadata, key_path=''): - """Strip ci-b64 prefix and return metadata with base64-encoded-keys set.""" +def process_instance_metadata(metadata, key_path='', sensitive_keys=()): + """Process all instance metadata cleaning it up for persisting as json. + + Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list + + @return Dict copy of processed metadata. + """ md_copy = copy.deepcopy(metadata) - md_copy['base64-encoded-keys'] = [] + md_copy['base64_encoded_keys'] = [] + md_copy['sensitive_keys'] = [] for key, val in metadata.items(): if key_path: sub_key_path = key_path + '/' + key else: sub_key_path = key + if key in sensitive_keys or sub_key_path in sensitive_keys: + md_copy['sensitive_keys'].append(sub_key_path) if isinstance(val, str) and val.startswith('ci-b64:'): - md_copy['base64-encoded-keys'].append(sub_key_path) + md_copy['base64_encoded_keys'].append(sub_key_path) md_copy[key] = val.replace('ci-b64:', '') if isinstance(val, dict): - return_val = process_base64_metadata(val, sub_key_path) - md_copy['base64-encoded-keys'].extend( - return_val.pop('base64-encoded-keys')) + return_val = process_instance_metadata( + val, sub_key_path, sensitive_keys) + md_copy['base64_encoded_keys'].extend( + return_val.pop('base64_encoded_keys')) + md_copy['sensitive_keys'].extend( + return_val.pop('sensitive_keys')) md_copy[key] = return_val return md_copy +def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE): + """Redact any sensitive keys from to provided metadata dictionary. + + Replace any keys values listed in 'sensitive_keys' with redact_value. + """ + if not metadata.get('sensitive_keys', []): + return metadata + md_copy = copy.deepcopy(metadata) + for key_path in metadata.get('sensitive_keys'): + path_parts = key_path.split('/') + obj = md_copy + for path in path_parts: + if isinstance(obj[path], dict) and path != path_parts[-1]: + obj = obj[path] + obj[path] = redact_value + return md_copy + + URLParams = namedtuple( 'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries']) @@ -103,14 +141,14 @@ class DataSource(object): url_timeout = 10 # timeout for each metadata url read attempt url_retries = 5 # number of times to retry url upon 404 - # The datasource defines a list of supported EventTypes during which + # The datasource defines a set of supported EventTypes during which # the datasource can react to changes in metadata and regenerate # network configuration on metadata changes. # A datasource which supports writing network config on each system boot - # would set update_events = {'network': [EventType.BOOT]} + # would call update_events['network'].add(EventType.BOOT). # Default: generate network config on new instance id (first boot). - update_events = {'network': [EventType.BOOT_NEW_INSTANCE]} + update_events = {'network': set([EventType.BOOT_NEW_INSTANCE])} # N-tuple listing default values for any metadata-related class # attributes cached on an instance by a process_data runs. These attribute @@ -122,6 +160,10 @@ class DataSource(object): _dirty_cache = False + # N-tuple of keypaths or keynames redact from instance-data.json for + # non-root users + sensitive_metadata_keys = ('security-credentials',) + def __init__(self, sys_cfg, distro, paths, ud_proc=None): self.sys_cfg = sys_cfg self.distro = distro @@ -147,12 +189,24 @@ class DataSource(object): def _get_standardized_metadata(self): """Return a dictionary of standardized metadata keys.""" - return {'v1': { - 'local-hostname': self.get_hostname(), - 'instance-id': self.get_instance_id(), - 'cloud-name': self.cloud_name, - 'region': self.region, - 'availability-zone': self.availability_zone}} + local_hostname = self.get_hostname() + instance_id = self.get_instance_id() + availability_zone = self.availability_zone + cloud_name = self.cloud_name + # When adding new standard keys prefer underscore-delimited instead + # of hyphen-delimted to support simple variable references in jinja + # templates. + return { + 'v1': { + 'availability-zone': availability_zone, + 'availability_zone': availability_zone, + 'cloud-name': cloud_name, + 'cloud_name': cloud_name, + 'instance-id': instance_id, + 'instance_id': instance_id, + 'local-hostname': local_hostname, + 'local_hostname': local_hostname, + 'region': self.region}} def clear_cached_attrs(self, attr_defaults=()): """Reset any cached metadata attributes to datasource defaults. @@ -180,15 +234,22 @@ class DataSource(object): """ self._dirty_cache = True return_value = self._get_data() - json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) if not return_value: return return_value + self.persist_instance_data() + return return_value + + def persist_instance_data(self): + """Process and write INSTANCE_JSON_FILE with all instance metadata. + Replace any hyphens with underscores in key names for use in template + processing. + + @return True on successful write, False otherwise. + """ instance_data = { - 'ds': { - 'meta-data': self.metadata, - 'user-data': self.get_userdata_raw(), - 'vendor-data': self.get_vendordata_raw()}} + 'ds': {'_doc': EXPERIMENTAL_TEXT, + 'meta_data': self.metadata}} if hasattr(self, 'network_json'): network_json = getattr(self, 'network_json') if network_json != UNSET: @@ -202,16 +263,23 @@ class DataSource(object): try: # Process content base64encoding unserializable values content = util.json_dumps(instance_data) - # Strip base64: prefix and return base64-encoded-keys - processed_data = process_base64_metadata(json.loads(content)) + # Strip base64: prefix and set base64_encoded_keys list. + processed_data = process_instance_metadata( + json.loads(content), + sensitive_keys=self.sensitive_metadata_keys) except TypeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) - return return_value + return False except UnicodeDecodeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) - return return_value - write_json(json_file, processed_data, mode=0o600) - return return_value + return False + json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) + write_json(json_file, processed_data) # World readable + json_sensitive_file = os.path.join(self.paths.run_dir, + INSTANCE_JSON_SENSITIVE_FILE) + write_json(json_sensitive_file, + redact_sensitive_keys(processed_data), mode=0o600) + return True def _get_data(self): """Walk metadata sources, process crawled data and save attributes.""" @@ -475,8 +543,8 @@ class DataSource(object): for update_scope, update_events in self.update_events.items(): if event in update_events: if not supported_events.get(update_scope): - supported_events[update_scope] = [] - supported_events[update_scope].append(event) + supported_events[update_scope] = set() + supported_events[update_scope].add(event) for scope, matched_events in supported_events.items(): LOG.debug( "Update datasource metadata and %s config due to events: %s", @@ -490,6 +558,8 @@ class DataSource(object): result = self.get_data() if result: return True + LOG.debug("Datasource %s not updated for events: %s", self, + ', '.join(source_event_types)) return False def check_instance_id(self, sys_cfg): @@ -669,6 +739,10 @@ def convert_vendordata(data, recurse=True): raise ValueError("Unknown data type for vendordata: %s" % type(data)) +class BrokenMetadata(IOError): + pass + + # 'depends' is a list of dependencies (DEP_FILESYSTEM) # ds_list is a list of 2 item lists # ds_list = [ |