From f0bc02d7e221c9aa5982b267739481420c761ead Mon Sep 17 00:00:00 2001
From: Chad Smith <chad.smith@canonical.com>
Date: Tue, 9 Oct 2018 21:46:35 +0000
Subject: instance-data: Add standard keys platform and subplatform. Refactor
 ec2.

Add the following instance-data.json standardized keys:
* v1._beta_keys: List any v1 keys in beta development,
  e.g. ['subplatform'].
* v1.public_ssh_keys: List of any cloud-provided ssh keys for the
  instance.
* v1.platform: String representing the cloud platform api supporting the
  datasource. For example: 'ec2' for aws, aliyun and brightbox cloud
  names.
* v1.subplatform: String with more details about the source of the
  metadata consumed. For example, metadata uri, config drive device path
  or seed directory.

To support the new platform and subplatform standardized instance-data,
DataSource and its subclasses grew platform and subplatform attributes.
The platform attribute defaults to the lowercase string datasource name at
self.dsname. This method is overridden in NoCloud, Ec2 and ConfigDrive
datasources.

The subplatform attribute calls a _get_subplatform method which will
return a string containing a simple slug for subplatform type such as
metadata, seed-dir or config-drive followed by a detailed uri, device or
directory path where the datasource consumed its configuration.

As part of this work, DatasourceEC2 methods _get_data and _crawl_metadata
have been refactored for a few reasons:
- crawl_metadata is now a read-only operation, persisting no attributes on
  the datasource instance and returns a dictionary of consumed metadata.
- crawl_metadata now closely represents the raw stucture of the ec2
  metadata consumed, so that end-users can leverage public ec2 metadata
  documentation where possible.
- crawl_metadata adds a '_metadata_api_version' key to the crawled
  ds.metadata to advertise what version of EC2's api was consumed by
  cloud-init.
- _get_data now does all the processing of crawl_metadata and saves
  datasource instance attributes userdata_raw, metadata etc.

Additional drive-bys:
* unit test rework for test_altcloud and test_azure to simplify mocks
  and make use of existing util and test_helpers functions.
---
 cloudinit/sources/__init__.py | 98 +++++++++++++++++++++++++++++++++----------
 1 file changed, 77 insertions(+), 21 deletions(-)

(limited to 'cloudinit/sources/__init__.py')
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index 5ac98826..9b90680f 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -54,6 +54,7 @@ REDACT_SENSITIVE_VALUE = 'redacted for non-root user'
 METADATA_CLOUD_NAME_KEY = 'cloud-name'
 
 UNSET = "_unset"
+METADATA_UNKNOWN = 'unknown'
 
 LOG = logging.getLogger(__name__)
 
@@ -133,6 +134,14 @@ class DataSource(object):
     # Cached cloud_name as determined by _get_cloud_name
     _cloud_name = None
 
+    # Cached cloud platform api type: e.g. ec2, openstack, kvm, lxd, azure etc.
+    _platform_type = None
+
+    # More details about the cloud platform:
+    #  - metadata (http://169.254.169.254/)
+    #  - seed-dir (<dirname>)
+    _subplatform = None
+
     # Track the discovered fallback nic for use in configuration generation.
     _fallback_interface = None
 
@@ -192,21 +201,24 @@ class DataSource(object):
         local_hostname = self.get_hostname()
         instance_id = self.get_instance_id()
         availability_zone = self.availability_zone
-        cloud_name = self.cloud_name
-        # When adding new standard keys prefer underscore-delimited instead
-        # of hyphen-delimted to support simple variable references in jinja
-        # templates.
+        # In the event of upgrade from existing cloudinit, pickled datasource
+        # will not contain these new class attributes. So we need to recrawl
+        # metadata to discover that content.
         return {
             'v1': {
+                '_beta_keys': ['subplatform'],
                 'availability-zone': availability_zone,
                 'availability_zone': availability_zone,
-                'cloud-name': cloud_name,
-                'cloud_name': cloud_name,
+                'cloud-name': self.cloud_name,
+                'cloud_name': self.cloud_name,
+                'platform': self.platform_type,
+                'public_ssh_keys': self.get_public_ssh_keys(),
                 'instance-id': instance_id,
                 'instance_id': instance_id,
                 'local-hostname': local_hostname,
                 'local_hostname': local_hostname,
-                'region': self.region}}
+                'region': self.region,
+                'subplatform': self.subplatform}}
 
     def clear_cached_attrs(self, attr_defaults=()):
         """Reset any cached metadata attributes to datasource defaults.
@@ -247,19 +259,27 @@ class DataSource(object):
 
         @return True on successful write, False otherwise.
         """
-        instance_data = {
-            'ds': {'_doc': EXPERIMENTAL_TEXT,
-                   'meta_data': self.metadata}}
-        if hasattr(self, 'network_json'):
-            network_json = getattr(self, 'network_json')
-            if network_json != UNSET:
-                instance_data['ds']['network_json'] = network_json
-        if hasattr(self, 'ec2_metadata'):
-            ec2_metadata = getattr(self, 'ec2_metadata')
-            if ec2_metadata != UNSET:
-                instance_data['ds']['ec2_metadata'] = ec2_metadata
+        if hasattr(self, '_crawled_metadata'):
+            # Any datasource with _crawled_metadata will best represent
+            # most recent, 'raw' metadata
+            crawled_metadata = copy.deepcopy(
+                getattr(self, '_crawled_metadata'))
+            crawled_metadata.pop('user-data', None)
+            crawled_metadata.pop('vendor-data', None)
+            instance_data = {'ds': crawled_metadata}
+        else:
+            instance_data = {'ds': {'meta_data': self.metadata}}
+            if hasattr(self, 'network_json'):
+                network_json = getattr(self, 'network_json')
+                if network_json != UNSET:
+                    instance_data['ds']['network_json'] = network_json
+            if hasattr(self, 'ec2_metadata'):
+                ec2_metadata = getattr(self, 'ec2_metadata')
+                if ec2_metadata != UNSET:
+                    instance_data['ds']['ec2_metadata'] = ec2_metadata
         instance_data.update(
             self._get_standardized_metadata())
+        instance_data['ds']['_doc'] = EXPERIMENTAL_TEXT
         try:
             # Process content base64encoding unserializable values
             content = util.json_dumps(instance_data)
@@ -346,6 +366,40 @@ class DataSource(object):
                     self.cloud_name)
         return self._fallback_interface
 
+    @property
+    def platform_type(self):
+        if not hasattr(self, '_platform_type'):
+            # Handle upgrade path where pickled datasource has no _platform.
+            self._platform_type = self.dsname.lower()
+        if not self._platform_type:
+            self._platform_type = self.dsname.lower()
+        return self._platform_type
+
+    @property
+    def subplatform(self):
+        """Return a string representing subplatform details for the datasource.
+
+        This should be guidance for where the metadata is sourced.
+        Examples of this on different clouds:
+            ec2:       metadata (http://169.254.169.254)
+            openstack: configdrive (/dev/path)
+            openstack: metadata (http://169.254.169.254)
+            nocloud:   seed-dir (/seed/dir/path)
+            lxd:   nocloud (/seed/dir/path)
+        """
+        if not hasattr(self, '_subplatform'):
+            # Handle upgrade path where pickled datasource has no _platform.
+            self._subplatform = self._get_subplatform()
+        if not self._subplatform:
+            self._subplatform = self._get_subplatform()
+        return self._subplatform
+
+    def _get_subplatform(self):
+        """Subclasses should implement to return a "slug (detail)" string."""
+        if hasattr(self, 'metadata_address'):
+            return 'metadata (%s)' % getattr(self, 'metadata_address')
+        return METADATA_UNKNOWN
+
     @property
     def cloud_name(self):
         """Return lowercase cloud name as determined by the datasource.
@@ -359,9 +413,11 @@ class DataSource(object):
             cloud_name = self.metadata.get(METADATA_CLOUD_NAME_KEY)
             if isinstance(cloud_name, six.string_types):
                 self._cloud_name = cloud_name.lower()
-            LOG.debug(
-                'Ignoring metadata provided key %s: non-string type %s',
-                METADATA_CLOUD_NAME_KEY, type(cloud_name))
+            else:
+                self._cloud_name = self._get_cloud_name().lower()
+                LOG.debug(
+                    'Ignoring metadata provided key %s: non-string type %s',
+                    METADATA_CLOUD_NAME_KEY, type(cloud_name))
         else:
             self._cloud_name = self._get_cloud_name().lower()
         return self._cloud_name
-- 
cgit v1.2.3