diff options
author | Kim Hagen <kim.sidney@gmail.com> | 2018-10-25 22:26:25 +0200 |
---|---|---|
committer | Kim Hagen <kim.sidney@gmail.com> | 2018-10-25 22:26:25 +0200 |
commit | b120f4f7a670674779a93f8c882c81f44a993888 (patch) | |
tree | 906d15f6520751b5e8fbeb49b680e673a5cc6aa3 /cloudinit/sources/__init__.py | |
parent | 838581d57c8765d3e487f58bc37ea103af39d26f (diff) | |
parent | 833adcdf6f85ec2305e62bea5a20f9363bf95507 (diff) | |
download | vyos-cloud-init-b120f4f7a670674779a93f8c882c81f44a993888.tar.gz vyos-cloud-init-b120f4f7a670674779a93f8c882c81f44a993888.zip |
Merge tag 'ubuntu/18.4-0ubuntu1_16.04.2' into current
Conflicts:
cloudinit/sources/DataSourceAzure.py
config/cloud.cfg.tmpl
integration-requirements.txt
tools/read-version
Diffstat (limited to 'cloudinit/sources/__init__.py')
-rw-r--r-- | cloudinit/sources/__init__.py | 278 |
1 files changed, 252 insertions, 26 deletions
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index df0b374a..5ac98826 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -9,6 +9,7 @@ # This file is part of cloud-init. See LICENSE file for license information. import abc +from collections import namedtuple import copy import json import os @@ -17,6 +18,8 @@ import six from cloudinit.atomic_helper import write_json from cloudinit import importer from cloudinit import log as logging +from cloudinit import net +from cloudinit.event import EventType from cloudinit import type_utils from cloudinit import user_data as ud from cloudinit import util @@ -35,12 +38,23 @@ DEP_FILESYSTEM = "FILESYSTEM" DEP_NETWORK = "NETWORK" DS_PREFIX = 'DataSource' -# File in which instance meta-data, user-data and vendor-data is written +EXPERIMENTAL_TEXT = ( + "EXPERIMENTAL: The structure and format of content scoped under the 'ds'" + " key may change in subsequent releases of cloud-init.") + + +# File in which public available instance meta-data is written +# security-sensitive key values are redacted from this world-readable file INSTANCE_JSON_FILE = 'instance-data.json' +# security-sensitive key values are present in this root-readable file +INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json' +REDACT_SENSITIVE_VALUE = 'redacted for non-root user' # Key which can be provide a cloud's official product name to cloud-init METADATA_CLOUD_NAME_KEY = 'cloud-name' +UNSET = "_unset" + LOG = logging.getLogger(__name__) @@ -48,26 +62,64 @@ class DataSourceNotFoundException(Exception): pass -def process_base64_metadata(metadata, key_path=''): - """Strip ci-b64 prefix and return metadata with base64-encoded-keys set.""" +class InvalidMetaDataException(Exception): + """Raised when metadata is broken, unavailable or disabled.""" + pass + + +def process_instance_metadata(metadata, key_path='', sensitive_keys=()): + """Process all instance metadata cleaning it up for persisting as json. + + Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list + + @return Dict copy of processed metadata. + """ md_copy = copy.deepcopy(metadata) - md_copy['base64-encoded-keys'] = [] + md_copy['base64_encoded_keys'] = [] + md_copy['sensitive_keys'] = [] for key, val in metadata.items(): if key_path: sub_key_path = key_path + '/' + key else: sub_key_path = key + if key in sensitive_keys or sub_key_path in sensitive_keys: + md_copy['sensitive_keys'].append(sub_key_path) if isinstance(val, str) and val.startswith('ci-b64:'): - md_copy['base64-encoded-keys'].append(sub_key_path) + md_copy['base64_encoded_keys'].append(sub_key_path) md_copy[key] = val.replace('ci-b64:', '') if isinstance(val, dict): - return_val = process_base64_metadata(val, sub_key_path) - md_copy['base64-encoded-keys'].extend( - return_val.pop('base64-encoded-keys')) + return_val = process_instance_metadata( + val, sub_key_path, sensitive_keys) + md_copy['base64_encoded_keys'].extend( + return_val.pop('base64_encoded_keys')) + md_copy['sensitive_keys'].extend( + return_val.pop('sensitive_keys')) md_copy[key] = return_val return md_copy +def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE): + """Redact any sensitive keys from to provided metadata dictionary. + + Replace any keys values listed in 'sensitive_keys' with redact_value. + """ + if not metadata.get('sensitive_keys', []): + return metadata + md_copy = copy.deepcopy(metadata) + for key_path in metadata.get('sensitive_keys'): + path_parts = key_path.split('/') + obj = md_copy + for path in path_parts: + if isinstance(obj[path], dict) and path != path_parts[-1]: + obj = obj[path] + obj[path] = redact_value + return md_copy + + +URLParams = namedtuple( + 'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries']) + + @six.add_metaclass(abc.ABCMeta) class DataSource(object): @@ -81,6 +133,37 @@ class DataSource(object): # Cached cloud_name as determined by _get_cloud_name _cloud_name = None + # Track the discovered fallback nic for use in configuration generation. + _fallback_interface = None + + # read_url_params + url_max_wait = -1 # max_wait < 0 means do not wait + url_timeout = 10 # timeout for each metadata url read attempt + url_retries = 5 # number of times to retry url upon 404 + + # The datasource defines a set of supported EventTypes during which + # the datasource can react to changes in metadata and regenerate + # network configuration on metadata changes. + # A datasource which supports writing network config on each system boot + # would call update_events['network'].add(EventType.BOOT). + + # Default: generate network config on new instance id (first boot). + update_events = {'network': set([EventType.BOOT_NEW_INSTANCE])} + + # N-tuple listing default values for any metadata-related class + # attributes cached on an instance by a process_data runs. These attribute + # values are reset via clear_cached_attrs during any update_metadata call. + cached_attr_defaults = ( + ('ec2_metadata', UNSET), ('network_json', UNSET), + ('metadata', {}), ('userdata', None), ('userdata_raw', None), + ('vendordata', None), ('vendordata_raw', None)) + + _dirty_cache = False + + # N-tuple of keypaths or keynames redact from instance-data.json for + # non-root users + sensitive_metadata_keys = ('security-credentials',) + def __init__(self, sys_cfg, distro, paths, ud_proc=None): self.sys_cfg = sys_cfg self.distro = distro @@ -106,49 +189,140 @@ class DataSource(object): def _get_standardized_metadata(self): """Return a dictionary of standardized metadata keys.""" - return {'v1': { - 'local-hostname': self.get_hostname(), - 'instance-id': self.get_instance_id(), - 'cloud-name': self.cloud_name, - 'region': self.region, - 'availability-zone': self.availability_zone}} + local_hostname = self.get_hostname() + instance_id = self.get_instance_id() + availability_zone = self.availability_zone + cloud_name = self.cloud_name + # When adding new standard keys prefer underscore-delimited instead + # of hyphen-delimted to support simple variable references in jinja + # templates. + return { + 'v1': { + 'availability-zone': availability_zone, + 'availability_zone': availability_zone, + 'cloud-name': cloud_name, + 'cloud_name': cloud_name, + 'instance-id': instance_id, + 'instance_id': instance_id, + 'local-hostname': local_hostname, + 'local_hostname': local_hostname, + 'region': self.region}} + + def clear_cached_attrs(self, attr_defaults=()): + """Reset any cached metadata attributes to datasource defaults. + + @param attr_defaults: Optional tuple of (attr, value) pairs to + set instead of cached_attr_defaults. + """ + if not self._dirty_cache: + return + if attr_defaults: + attr_values = attr_defaults + else: + attr_values = self.cached_attr_defaults + + for attribute, value in attr_values: + if hasattr(self, attribute): + setattr(self, attribute, value) + if not attr_defaults: + self._dirty_cache = False def get_data(self): """Datasources implement _get_data to setup metadata and userdata_raw. Minimally, the datasource should return a boolean True on success. """ + self._dirty_cache = True return_value = self._get_data() - json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) if not return_value: return return_value + self.persist_instance_data() + return return_value + def persist_instance_data(self): + """Process and write INSTANCE_JSON_FILE with all instance metadata. + + Replace any hyphens with underscores in key names for use in template + processing. + + @return True on successful write, False otherwise. + """ instance_data = { - 'ds': { - 'meta-data': self.metadata, - 'user-data': self.get_userdata_raw(), - 'vendor-data': self.get_vendordata_raw()}} + 'ds': {'_doc': EXPERIMENTAL_TEXT, + 'meta_data': self.metadata}} + if hasattr(self, 'network_json'): + network_json = getattr(self, 'network_json') + if network_json != UNSET: + instance_data['ds']['network_json'] = network_json + if hasattr(self, 'ec2_metadata'): + ec2_metadata = getattr(self, 'ec2_metadata') + if ec2_metadata != UNSET: + instance_data['ds']['ec2_metadata'] = ec2_metadata instance_data.update( self._get_standardized_metadata()) try: # Process content base64encoding unserializable values content = util.json_dumps(instance_data) - # Strip base64: prefix and return base64-encoded-keys - processed_data = process_base64_metadata(json.loads(content)) + # Strip base64: prefix and set base64_encoded_keys list. + processed_data = process_instance_metadata( + json.loads(content), + sensitive_keys=self.sensitive_metadata_keys) except TypeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) - return return_value + return False except UnicodeDecodeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) - return return_value - write_json(json_file, processed_data, mode=0o600) - return return_value + return False + json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) + write_json(json_file, processed_data) # World readable + json_sensitive_file = os.path.join(self.paths.run_dir, + INSTANCE_JSON_SENSITIVE_FILE) + write_json(json_sensitive_file, + redact_sensitive_keys(processed_data), mode=0o600) + return True def _get_data(self): + """Walk metadata sources, process crawled data and save attributes.""" raise NotImplementedError( 'Subclasses of DataSource must implement _get_data which' ' sets self.metadata, vendordata_raw and userdata_raw.') + def get_url_params(self): + """Return the Datasource's prefered url_read parameters. + + Subclasses may override url_max_wait, url_timeout, url_retries. + + @return: A URLParams object with max_wait_seconds, timeout_seconds, + num_retries. + """ + max_wait = self.url_max_wait + try: + max_wait = int(self.ds_cfg.get("max_wait", self.url_max_wait)) + except ValueError: + util.logexc( + LOG, "Config max_wait '%s' is not an int, using default '%s'", + self.ds_cfg.get("max_wait"), max_wait) + + timeout = self.url_timeout + try: + timeout = max( + 0, int(self.ds_cfg.get("timeout", self.url_timeout))) + except ValueError: + timeout = self.url_timeout + util.logexc( + LOG, "Config timeout '%s' is not an int, using default '%s'", + self.ds_cfg.get('timeout'), timeout) + + retries = self.url_retries + try: + retries = int(self.ds_cfg.get("retries", self.url_retries)) + except Exception: + util.logexc( + LOG, "Config retries '%s' is not an int, using default '%s'", + self.ds_cfg.get('retries'), retries) + + return URLParams(max_wait, timeout, retries) + def get_userdata(self, apply_filter=False): if self.userdata is None: self.userdata = self.ud_proc.process(self.get_userdata_raw()) @@ -162,6 +336,17 @@ class DataSource(object): return self.vendordata @property + def fallback_interface(self): + """Determine the network interface used during local network config.""" + if self._fallback_interface is None: + self._fallback_interface = net.find_fallback_nic() + if self._fallback_interface is None: + LOG.warning( + "Did not find a fallback interface on %s.", + self.cloud_name) + return self._fallback_interface + + @property def cloud_name(self): """Return lowercase cloud name as determined by the datasource. @@ -340,6 +525,43 @@ class DataSource(object): def get_package_mirror_info(self): return self.distro.get_package_mirror_info(data_source=self) + def update_metadata(self, source_event_types): + """Refresh cached metadata if the datasource supports this event. + + The datasource has a list of update_events which + trigger refreshing all cached metadata as well as refreshing the + network configuration. + + @param source_event_types: List of EventTypes which may trigger a + metadata update. + + @return True if the datasource did successfully update cached metadata + due to source_event_type. + """ + supported_events = {} + for event in source_event_types: + for update_scope, update_events in self.update_events.items(): + if event in update_events: + if not supported_events.get(update_scope): + supported_events[update_scope] = set() + supported_events[update_scope].add(event) + for scope, matched_events in supported_events.items(): + LOG.debug( + "Update datasource metadata and %s config due to events: %s", + scope, ', '.join(matched_events)) + # Each datasource has a cached config property which needs clearing + # Once cleared that config property will be regenerated from + # current metadata. + self.clear_cached_attrs((('_%s_config' % scope, UNSET),)) + if supported_events: + self.clear_cached_attrs() + result = self.get_data() + if result: + return True + LOG.debug("Datasource %s not updated for events: %s", self, + ', '.join(source_event_types)) + return False + def check_instance_id(self, sys_cfg): # quickly (local check only) if self.instance_id is still return False @@ -444,7 +666,7 @@ def find_source(sys_cfg, distro, paths, ds_deps, cfg_list, pkg_list, reporter): with myrep: LOG.debug("Seeing if we can get any data from %s", cls) s = cls(sys_cfg, distro, paths) - if s.get_data(): + if s.update_metadata([EventType.BOOT_NEW_INSTANCE]): myrep.message = "found %s data from %s" % (mode, name) return (s, type_utils.obj_name(cls)) except Exception: @@ -517,6 +739,10 @@ def convert_vendordata(data, recurse=True): raise ValueError("Unknown data type for vendordata: %s" % type(data)) +class BrokenMetadata(IOError): + pass + + # 'depends' is a list of dependencies (DEP_FILESYSTEM) # ds_list is a list of 2 item lists # ds_list = [ |