From 94a64529dccebd8fe8c7969370b8696e46023fbd Mon Sep 17 00:00:00 2001 From: Paride Legovini Date: Wed, 30 Jan 2019 15:38:56 +0000 Subject: Resolve flake8 comparison and pycodestyle over-ident issues Fixes: - flake8: use ==/!= to compare str, bytes, and int literals - pycodestyle: E117 over-indented --- cloudinit/sources/DataSourceEc2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cloudinit/sources/DataSourceEc2.py') diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 9ccf2cdc..eb6f27b2 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -442,7 +442,7 @@ def identify_aws(data): if (data['uuid'].startswith('ec2') and (data['uuid_source'] == 'hypervisor' or data['uuid'] == data['serial'])): - return CloudNames.AWS + return CloudNames.AWS return None -- cgit v1.2.3 From 0bb4c74e7f2d008b015d5453a1be88ae807b1f9b Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Thu, 14 Feb 2019 20:37:32 +0000 Subject: EC2: Rewrite network config on AWS Classic instances every boot AWS EC2 instances' network come in 2 basic flavors: Classic and VPC (Virtual Private Cloud). The former has an interesting behavior of having its MAC address changed whenever the instance is stopped/restarted. This behavior is not observed in VPC instances. In Ubuntu 18.04 (Bionic) the network "management" changed from ENI-style (etc/network/interfaces) to netplan, and when using netplan we observe the following block present in /etc/netplan/50-cloud-init.yaml: match: macaddress: aa:bb:cc:dd:ee:ff Jani Ollikainen noticed in Launchpad bug #1802073 that the EC2 Classic instances were booting without network access in Bionic after stop/restart procedure, due to their MAC address change behavior. It was narrowed down to the netplan MAC match block, that kept the old MAC address after stopping and restarting an instance, since the network configuration writing happens by default only once in EC2 instances, in the first boot. This patch changes the network configuration write to every boot in EC2 Classic instances, by checking against the "vpc-id" metadata information provided only in the VPC instances - if we don't have this metadata value, cloud-init will rewrite the network configuration file in every boot. This was tested in an EC2 Classic instance and proved to fix the issue; unit tests were also added for the new method is_classic_instance(). LP: #1802073 Reported-by: Jani Ollikainen Suggested-by: Ryan Harper Co-developed-by: Chad Smith Signed-off-by: Guilherme G. Piccoli --- cloudinit/sources/DataSourceEc2.py | 21 +++++++++++++++++++++ doc/rtd/topics/datasources/ec2.rst | 11 +++++++++++ tests/unittests/test_datasource/test_ec2.py | 24 ++++++++++++++++++++++++ 3 files changed, 56 insertions(+) (limited to 'cloudinit/sources/DataSourceEc2.py') diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index eb6f27b2..4f2f6ccb 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -19,6 +19,7 @@ from cloudinit import sources from cloudinit import url_helper as uhelp from cloudinit import util from cloudinit import warnings +from cloudinit.event import EventType LOG = logging.getLogger(__name__) @@ -107,6 +108,19 @@ class DataSourceEc2(sources.DataSource): 'dynamic', {}).get('instance-identity', {}).get('document', {}) return True + def is_classic_instance(self): + """Report if this instance type is Ec2 Classic (non-vpc).""" + if not self.metadata: + # Can return False on inconclusive as we are also called in + # network_config where metadata will be present. + # Secondary call site is in packaging postinst script. + return False + ifaces_md = self.metadata.get('network', {}).get('interfaces', {}) + for _mac, mac_data in ifaces_md.get('macs', {}).items(): + if 'vpc-id' in mac_data: + return False + return True + @property def launch_index(self): if not self.metadata: @@ -320,6 +334,13 @@ class DataSourceEc2(sources.DataSource): if isinstance(net_md, dict): result = convert_ec2_metadata_network_config( net_md, macs_to_nics=macs_to_nics, fallback_nic=iface) + # RELEASE_BLOCKER: Xenial debian/postinst needs to add + # EventType.BOOT on upgrade path for classic. + + # Non-VPC (aka Classic) Ec2 instances need to rewrite the + # network config file every boot due to MAC address change. + if self.is_classic_instance(): + self.update_events['network'].add(EventType.BOOT) else: LOG.warning("Metadata 'network' key not valid: %s.", net_md) self._network_config = result diff --git a/doc/rtd/topics/datasources/ec2.rst b/doc/rtd/topics/datasources/ec2.rst index 64c325d8..76beca92 100644 --- a/doc/rtd/topics/datasources/ec2.rst +++ b/doc/rtd/topics/datasources/ec2.rst @@ -90,4 +90,15 @@ An example configuration with the default values is provided below: max_wait: 120 timeout: 50 +Notes +----- + * There are 2 types of EC2 instances network-wise: VPC ones (Virtual Private + Cloud) and Classic ones (also known as non-VPC). One major difference + between them is that Classic instances have their MAC address changed on + stop/restart operations, so cloud-init will recreate the network config + file for EC2 Classic instances every boot. On VPC instances this file is + generated only in the first boot of the instance. + The check for the instance type is performed by is_classic_instance() + method. + .. vi: textwidth=78 diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py index 1a5956d9..20d59bfd 100644 --- a/tests/unittests/test_datasource/test_ec2.py +++ b/tests/unittests/test_datasource/test_ec2.py @@ -401,6 +401,30 @@ class TestEc2(test_helpers.HttprettyTestCase): ds.metadata = DEFAULT_METADATA self.assertEqual('my-identity-id', ds.get_instance_id()) + def test_classic_instance_true(self): + """If no vpc-id in metadata, is_classic_instance must return true.""" + md_copy = copy.deepcopy(DEFAULT_METADATA) + ifaces_md = md_copy.get('network', {}).get('interfaces', {}) + for _mac, mac_data in ifaces_md.get('macs', {}).items(): + if 'vpc-id' in mac_data: + del mac_data['vpc-id'] + + ds = self._setup_ds( + platform_data=self.valid_platform_data, + sys_cfg={'datasource': {'Ec2': {'strict_id': False}}}, + md={'md': md_copy}) + self.assertTrue(ds.get_data()) + self.assertTrue(ds.is_classic_instance()) + + def test_classic_instance_false(self): + """If vpc-id in metadata, is_classic_instance must return false.""" + ds = self._setup_ds( + platform_data=self.valid_platform_data, + sys_cfg={'datasource': {'Ec2': {'strict_id': False}}}, + md={'md': DEFAULT_METADATA}) + self.assertTrue(ds.get_data()) + self.assertFalse(ds.is_classic_instance()) + @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') def test_valid_platform_with_strict_true(self, m_dhcp): """Valid platform data should return true with strict_id true.""" -- cgit v1.2.3 From 1e6a72b679838d87c425edd21013260e9f17b500 Mon Sep 17 00:00:00 2001 From: Daniel Watkins Date: Tue, 12 Mar 2019 14:52:53 +0000 Subject: DataSourceEc2: update RELEASE_BLOCKER to be more accurate Our previous understanding of the upgrade issue was incomplete; it turns out the only change we need is the one now outlined. --- cloudinit/sources/DataSourceEc2.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'cloudinit/sources/DataSourceEc2.py') diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 4f2f6ccb..ac28f1db 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -334,8 +334,12 @@ class DataSourceEc2(sources.DataSource): if isinstance(net_md, dict): result = convert_ec2_metadata_network_config( net_md, macs_to_nics=macs_to_nics, fallback_nic=iface) - # RELEASE_BLOCKER: Xenial debian/postinst needs to add - # EventType.BOOT on upgrade path for classic. + + # RELEASE_BLOCKER: xenial should drop the below if statement, + # because the issue being addressed doesn't exist pre-netplan. + # (This datasource doesn't implement check_instance_id() so the + # datasource object is recreated every boot; this means we don't + # need to modify update_events on cloud-init upgrade.) # Non-VPC (aka Classic) Ec2 instances need to rewrite the # network config file every boot due to MAC address change. -- cgit v1.2.3 From 9aa97cfc73b31dc548a240e5f4bd1ef41861cc4d Mon Sep 17 00:00:00 2001 From: Daniel Watkins Date: Wed, 8 May 2019 15:14:31 +0000 Subject: replace remaining occurrences of LOG.warn --- cloudinit/sources/DataSourceCloudStack.py | 2 +- cloudinit/sources/DataSourceEc2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'cloudinit/sources/DataSourceEc2.py') diff --git a/cloudinit/sources/DataSourceCloudStack.py b/cloudinit/sources/DataSourceCloudStack.py index d4b758f2..f185dc71 100644 --- a/cloudinit/sources/DataSourceCloudStack.py +++ b/cloudinit/sources/DataSourceCloudStack.py @@ -95,7 +95,7 @@ class DataSourceCloudStack(sources.DataSource): start_time = time.time() url = uhelp.wait_for_url( urls=urls, max_wait=url_params.max_wait_seconds, - timeout=url_params.timeout_seconds, status_cb=LOG.warn) + timeout=url_params.timeout_seconds, status_cb=LOG.warning) if url: LOG.debug("Using metadata source: '%s'", url) diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index ac28f1db..5c017bfb 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -208,7 +208,7 @@ class DataSourceEc2(sources.DataSource): start_time = time.time() url = uhelp.wait_for_url( urls=urls, max_wait=url_params.max_wait_seconds, - timeout=url_params.timeout_seconds, status_cb=LOG.warn) + timeout=url_params.timeout_seconds, status_cb=LOG.warning) if url: self.metadata_address = url2base[url] -- cgit v1.2.3 From e7f16e215ef4fd6a31ddd2f1b585bbb6508bff06 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Wed, 11 Sep 2019 21:09:34 +0000 Subject: Brightbox: restrict detection to require full domain match .brightbox.com The detection for brightbox in both ds-identify and in identify_brightbox would incorrectly match the domain 'bobrightbox', which is not a brightbox platform. The fix here is to restrict matching to '*.brightbox.com' rather than '*brightbox.com' Also, while here remove a url to bug 1661693 which added the knowledge of brightbox. --- cloudinit/sources/DataSourceEc2.py | 2 +- tests/unittests/test_ds_identify.py | 16 ++++++++++++++-- tools/ds-identify | 3 +-- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'cloudinit/sources/DataSourceEc2.py') diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 5c017bfb..10107456 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -473,7 +473,7 @@ def identify_aws(data): def identify_brightbox(data): - if data['serial'].endswith('brightbox.com'): + if data['serial'].endswith('.brightbox.com'): return CloudNames.BRIGHTBOX diff --git a/tests/unittests/test_ds_identify.py b/tests/unittests/test_ds_identify.py index 587e6993..de87be29 100644 --- a/tests/unittests/test_ds_identify.py +++ b/tests/unittests/test_ds_identify.py @@ -195,6 +195,10 @@ class DsIdentifyBase(CiTestCase): return self._check_via_dict( data, RC_FOUND, dslist=[data.get('ds'), DS_NONE]) + def _test_ds_not_found(self, name): + data = copy.deepcopy(VALID_CFG[name]) + return self._check_via_dict(data, RC_NOT_FOUND) + def _check_via_dict(self, data, rc, dslist=None, **kwargs): ret = self._call_via_dict(data, **kwargs) good = False @@ -244,9 +248,13 @@ class TestDsIdentify(DsIdentifyBase): self._test_ds_found('Ec2-xen') def test_brightbox_is_ec2(self): - """EC2: product_serial ends with 'brightbox.com'""" + """EC2: product_serial ends with '.brightbox.com'""" self._test_ds_found('Ec2-brightbox') + def test_bobrightbox_is_not_brightbox(self): + """EC2: bobrightbox.com in product_serial is not brightbox'""" + self._test_ds_not_found('Ec2-brightbox-negative') + def test_gce_by_product_name(self): """GCE identifies itself with product_name.""" self._test_ds_found('GCE') @@ -724,7 +732,11 @@ VALID_CFG = { }, 'Ec2-brightbox': { 'ds': 'Ec2', - 'files': {P_PRODUCT_SERIAL: 'facc6e2f.brightbox.com\n'}, + 'files': {P_PRODUCT_SERIAL: 'srv-otuxg.gb1.brightbox.com\n'}, + }, + 'Ec2-brightbox-negative': { + 'ds': 'Ec2', + 'files': {P_PRODUCT_SERIAL: 'tricky-host.bobrightbox.com\n'}, }, 'GCE': { 'ds': 'GCE', diff --git a/tools/ds-identify b/tools/ds-identify index e0d4865c..2447d14f 100755 --- a/tools/ds-identify +++ b/tools/ds-identify @@ -891,9 +891,8 @@ ec2_identify_platform() { local default="$1" local serial="${DI_DMI_PRODUCT_SERIAL}" - # brightbox https://bugs.launchpad.net/cloud-init/+bug/1661693 case "$serial" in - *brightbox.com) _RET="Brightbox"; return 0;; + *.brightbox.com) _RET="Brightbox"; return 0;; esac # AWS http://docs.aws.amazon.com/AWSEC2/ -- cgit v1.2.3 From 571f7c36e89f67f4c2d1cacfd8f9269bf864d560 Mon Sep 17 00:00:00 2001 From: Shixin Ruan Date: Wed, 18 Sep 2019 13:15:25 +0000 Subject: Add datasource for ZStack platform. Zstack platform provides a AWS Ec2 metadata service, and identifies their platform to the guest by setting the 'chassis asset tag' to a string that ends with '.zstack.io'. LP: #1841181 --- cloudinit/apport.py | 1 + cloudinit/sources/DataSourceEc2.py | 16 ++++++++++++- doc/rtd/topics/datasources.rst | 1 + doc/rtd/topics/datasources/zstack.rst | 36 +++++++++++++++++++++++++++++ tests/unittests/test_datasource/test_ec2.py | 28 ++++++++++++++++++++++ tests/unittests/test_ds_identify.py | 9 +++++++- tools/ds-identify | 5 ++++ 7 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 doc/rtd/topics/datasources/zstack.rst (limited to 'cloudinit/sources/DataSourceEc2.py') diff --git a/cloudinit/apport.py b/cloudinit/apport.py index 003ff1ff..fde1f75b 100644 --- a/cloudinit/apport.py +++ b/cloudinit/apport.py @@ -37,6 +37,7 @@ KNOWN_CLOUD_NAMES = [ 'Scaleway', 'SmartOS', 'VMware', + 'ZStack', 'Other'] # Potentially clear text collected logs diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 10107456..6c72ace2 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -33,6 +33,7 @@ class CloudNames(object): ALIYUN = "aliyun" AWS = "aws" BRIGHTBOX = "brightbox" + ZSTACK = "zstack" # UNKNOWN indicates no positive id. If strict_id is 'warn' or 'false', # then an attempt at the Ec2 Metadata service will be made. UNKNOWN = "unknown" @@ -477,10 +478,16 @@ def identify_brightbox(data): return CloudNames.BRIGHTBOX +def identify_zstack(data): + if data['asset_tag'].endswith('.zstack.io'): + return CloudNames.ZSTACK + + def identify_platform(): # identify the platform and return an entry in CloudNames. data = _collect_platform_data() - checks = (identify_aws, identify_brightbox, lambda x: CloudNames.UNKNOWN) + checks = (identify_aws, identify_brightbox, identify_zstack, + lambda x: CloudNames.UNKNOWN) for checker in checks: try: result = checker(data) @@ -498,6 +505,7 @@ def _collect_platform_data(): uuid: system-uuid from dmi or /sys/hypervisor uuid_source: 'hypervisor' (/sys/hypervisor/uuid) or 'dmi' serial: dmi 'system-serial-number' (/sys/.../product_serial) + asset_tag: 'dmidecode -s chassis-asset-tag' On Ec2 instances experimentation is that product_serial is upper case, and product_uuid is lower case. This returns lower case values for both. @@ -520,6 +528,12 @@ def _collect_platform_data(): data['serial'] = serial.lower() + asset_tag = util.read_dmi_data('chassis-asset-tag') + if asset_tag is None: + asset_tag = '' + + data['asset_tag'] = asset_tag.lower() + return data diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index 8e58be97..a337c08c 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -45,6 +45,7 @@ The following is a list of documents for each supported datasource: datasources/oracle.rst datasources/ovf.rst datasources/smartos.rst + datasources/zstack.rst Creation diff --git a/doc/rtd/topics/datasources/zstack.rst b/doc/rtd/topics/datasources/zstack.rst new file mode 100644 index 00000000..36e60ffb --- /dev/null +++ b/doc/rtd/topics/datasources/zstack.rst @@ -0,0 +1,36 @@ +.. _datasource_zstack: + +ZStack +====== +ZStack platform provides a AWS Ec2 metadata service, but with different datasource identity. +More information about ZStack can be found at `ZStack `__. + +Discovery +--------- +To determine whether a vm running on ZStack platform, cloud-init checks DMI information +by 'dmidecode -s chassis-asset-tag', if the output ends with '.zstack.io', it's running +on ZStack platform: + + +Metadata +^^^^^^^^ +Same as EC2, instance metadata can be queried at + +:: + + GET http://169.254.169.254/2009-04-04/meta-data/ + instance-id + local-hostname + +Userdata +^^^^^^^^ +Same as EC2, instance userdata can be queried at + +:: + + GET http://169.254.169.254/2009-04-04/user-data/ + meta_data.json + user_data + password + +.. vi: textwidth=78 diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py index 1ec8e009..6fabf258 100644 --- a/tests/unittests/test_datasource/test_ec2.py +++ b/tests/unittests/test_datasource/test_ec2.py @@ -662,4 +662,32 @@ class TestConvertEc2MetadataNetworkConfig(test_helpers.CiTestCase): expected, ec2.convert_ec2_metadata_network_config(self.network_metadata)) + +class TesIdentifyPlatform(test_helpers.CiTestCase): + + def collmock(self, **kwargs): + """return non-special _collect_platform_data updated with changes.""" + unspecial = { + 'asset_tag': '3857-0037-2746-7462-1818-3997-77', + 'serial': 'H23-C4J3JV-R6', + 'uuid': '81c7e555-6471-4833-9551-1ab366c4cfd2', + 'uuid_source': 'dmi', + } + unspecial.update(**kwargs) + return unspecial + + @mock.patch('cloudinit.sources.DataSourceEc2._collect_platform_data') + def test_identify_zstack(self, m_collect): + """zstack should be identified if cassis-asset-tag ends in .zstack.io + """ + m_collect.return_value = self.collmock(asset_tag='123456.zstack.io') + self.assertEqual(ec2.CloudNames.ZSTACK, ec2.identify_platform()) + + @mock.patch('cloudinit.sources.DataSourceEc2._collect_platform_data') + def test_identify_zstack_full_domain_only(self, m_collect): + """zstack asset-tag matching should match only on full domain boundary. + """ + m_collect.return_value = self.collmock(asset_tag='123456.buzzstack.io') + self.assertEqual(ec2.CloudNames.UNKNOWN, ec2.identify_platform()) + # vi: ts=4 expandtab diff --git a/tests/unittests/test_ds_identify.py b/tests/unittests/test_ds_identify.py index de87be29..7aeeb91c 100644 --- a/tests/unittests/test_ds_identify.py +++ b/tests/unittests/test_ds_identify.py @@ -609,6 +609,10 @@ class TestDsIdentify(DsIdentifyBase): self.assertEqual(expected, [p for p in expected if p in toks], "path did not have expected tokens") + def test_zstack_is_ec2(self): + """EC2: chassis asset tag ends with 'zstack.io'""" + self._test_ds_found('Ec2-ZStack') + class TestIsIBMProvisioning(DsIdentifyBase): """Test the is_ibm_provisioning method in ds-identify.""" @@ -971,8 +975,11 @@ VALID_CFG = { {'name': 'blkid', 'ret': 2, 'out': ''}, ], 'files': {ds_smartos.METADATA_SOCKFILE: 'would be a socket\n'}, + }, + 'Ec2-ZStack': { + 'ds': 'Ec2', + 'files': {P_CHASSIS_ASSET_TAG: '123456.zstack.io\n'}, } - } # vi: ts=4 expandtab diff --git a/tools/ds-identify b/tools/ds-identify index 2447d14f..f76f2a6e 100755 --- a/tools/ds-identify +++ b/tools/ds-identify @@ -895,6 +895,11 @@ ec2_identify_platform() { *.brightbox.com) _RET="Brightbox"; return 0;; esac + local asset_tag="${DI_DMI_CHASSIS_ASSET_TAG}" + case "$asset_tag" in + *.zstack.io) _RET="ZStack"; return 0;; + esac + # AWS http://docs.aws.amazon.com/AWSEC2/ # latest/UserGuide/identify_ec2_instances.html local uuid="" hvuuid="${PATH_SYS_HYPERVISOR}/uuid" -- cgit v1.2.3 From 7d5d34f3643a2108d667759f57a5ab63d0affadd Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Fri, 11 Oct 2019 14:54:49 +0000 Subject: Add Support for e24cloud to Ec2 datasource. e24cloud provides an EC2 compatible datasource. This just identifies their platform based on dmi 'system-vendor' having 'e24cloud'. https://www.e24cloud.com/en/ . Updated chassis typo in zstack unit test docstring. LP: #1696476 --- cloudinit/apport.py | 1 + cloudinit/sources/DataSourceEc2.py | 12 +++++++++++- doc/rtd/topics/datasources.rst | 3 ++- doc/rtd/topics/datasources/e24cloud.rst | 9 +++++++++ tests/unittests/test_datasource/test_ec2.py | 15 ++++++++++++++- tests/unittests/test_ds_identify.py | 18 +++++++++++++++++- tools/ds-identify | 5 +++++ 7 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 doc/rtd/topics/datasources/e24cloud.rst (limited to 'cloudinit/sources/DataSourceEc2.py') diff --git a/cloudinit/apport.py b/cloudinit/apport.py index fde1f75b..c6797f12 100644 --- a/cloudinit/apport.py +++ b/cloudinit/apport.py @@ -22,6 +22,7 @@ KNOWN_CLOUD_NAMES = [ 'CloudSigma', 'CloudStack', 'DigitalOcean', + 'E24Cloud', 'GCE - Google Compute Engine', 'Exoscale', 'Hetzner Cloud', diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 6c72ace2..1d88c9b1 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -34,6 +34,7 @@ class CloudNames(object): AWS = "aws" BRIGHTBOX = "brightbox" ZSTACK = "zstack" + E24CLOUD = "e24cloud" # UNKNOWN indicates no positive id. If strict_id is 'warn' or 'false', # then an attempt at the Ec2 Metadata service will be made. UNKNOWN = "unknown" @@ -483,11 +484,16 @@ def identify_zstack(data): return CloudNames.ZSTACK +def identify_e24cloud(data): + if data['vendor'] == 'e24cloud': + return CloudNames.E24CLOUD + + def identify_platform(): # identify the platform and return an entry in CloudNames. data = _collect_platform_data() checks = (identify_aws, identify_brightbox, identify_zstack, - lambda x: CloudNames.UNKNOWN) + identify_e24cloud, lambda x: CloudNames.UNKNOWN) for checker in checks: try: result = checker(data) @@ -506,6 +512,7 @@ def _collect_platform_data(): uuid_source: 'hypervisor' (/sys/hypervisor/uuid) or 'dmi' serial: dmi 'system-serial-number' (/sys/.../product_serial) asset_tag: 'dmidecode -s chassis-asset-tag' + vendor: dmi 'system-manufacturer' (/sys/.../sys_vendor) On Ec2 instances experimentation is that product_serial is upper case, and product_uuid is lower case. This returns lower case values for both. @@ -534,6 +541,9 @@ def _collect_platform_data(): data['asset_tag'] = asset_tag.lower() + vendor = util.read_dmi_data('system-manufacturer') + data['vendor'] = (vendor if vendor else '').lower() + return data diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index a337c08c..70fbe07d 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -29,8 +29,9 @@ The following is a list of documents for each supported datasource: datasources/aliyun.rst datasources/altcloud.rst - datasources/ec2.rst datasources/azure.rst + datasources/ec2.rst + datasources/e24cloud.rst datasources/cloudsigma.rst datasources/cloudstack.rst datasources/configdrive.rst diff --git a/doc/rtd/topics/datasources/e24cloud.rst b/doc/rtd/topics/datasources/e24cloud.rst new file mode 100644 index 00000000..de9a4127 --- /dev/null +++ b/doc/rtd/topics/datasources/e24cloud.rst @@ -0,0 +1,9 @@ +.. _datasource_e24cloud: + +E24Cloud +======== +`E24Cloud ` platform provides an AWS Ec2 metadata +service clone. It identifies itself to guests using the dmi +system-manufacturer (/sys/class/dmi/id/sys_vendor). + +.. vi: textwidth=78 diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py index 6fabf258..5e1dd777 100644 --- a/tests/unittests/test_datasource/test_ec2.py +++ b/tests/unittests/test_datasource/test_ec2.py @@ -672,13 +672,14 @@ class TesIdentifyPlatform(test_helpers.CiTestCase): 'serial': 'H23-C4J3JV-R6', 'uuid': '81c7e555-6471-4833-9551-1ab366c4cfd2', 'uuid_source': 'dmi', + 'vendor': 'tothecloud', } unspecial.update(**kwargs) return unspecial @mock.patch('cloudinit.sources.DataSourceEc2._collect_platform_data') def test_identify_zstack(self, m_collect): - """zstack should be identified if cassis-asset-tag ends in .zstack.io + """zstack should be identified if chassis-asset-tag ends in .zstack.io """ m_collect.return_value = self.collmock(asset_tag='123456.zstack.io') self.assertEqual(ec2.CloudNames.ZSTACK, ec2.identify_platform()) @@ -690,4 +691,16 @@ class TesIdentifyPlatform(test_helpers.CiTestCase): m_collect.return_value = self.collmock(asset_tag='123456.buzzstack.io') self.assertEqual(ec2.CloudNames.UNKNOWN, ec2.identify_platform()) + @mock.patch('cloudinit.sources.DataSourceEc2._collect_platform_data') + def test_identify_e24cloud(self, m_collect): + """e24cloud identified if vendor is e24cloud""" + m_collect.return_value = self.collmock(vendor='e24cloud') + self.assertEqual(ec2.CloudNames.E24CLOUD, ec2.identify_platform()) + + @mock.patch('cloudinit.sources.DataSourceEc2._collect_platform_data') + def test_identify_e24cloud_negative(self, m_collect): + """e24cloud identified if vendor is e24cloud""" + m_collect.return_value = self.collmock(vendor='e24cloudyday') + self.assertEqual(ec2.CloudNames.UNKNOWN, ec2.identify_platform()) + # vi: ts=4 expandtab diff --git a/tests/unittests/test_ds_identify.py b/tests/unittests/test_ds_identify.py index c5b5c46c..12c6ae36 100644 --- a/tests/unittests/test_ds_identify.py +++ b/tests/unittests/test_ds_identify.py @@ -616,6 +616,14 @@ class TestDsIdentify(DsIdentifyBase): """EC2: chassis asset tag ends with 'zstack.io'""" self._test_ds_found('Ec2-ZStack') + def test_e24cloud_is_ec2(self): + """EC2: e24cloud identified by sys_vendor""" + self._test_ds_found('Ec2-E24Cloud') + + def test_e24cloud_not_active(self): + """EC2: bobrightbox.com in product_serial is not brightbox'""" + self._test_ds_not_found('Ec2-E24Cloud-negative') + class TestIsIBMProvisioning(DsIdentifyBase): """Test the is_ibm_provisioning method in ds-identify.""" @@ -994,7 +1002,15 @@ VALID_CFG = { 'Ec2-ZStack': { 'ds': 'Ec2', 'files': {P_CHASSIS_ASSET_TAG: '123456.zstack.io\n'}, - } + }, + 'Ec2-E24Cloud': { + 'ds': 'Ec2', + 'files': {P_SYS_VENDOR: 'e24cloud\n'}, + }, + 'Ec2-E24Cloud-negative': { + 'ds': 'Ec2', + 'files': {P_SYS_VENDOR: 'e24cloudyday\n'}, + } } # vi: ts=4 expandtab diff --git a/tools/ds-identify b/tools/ds-identify index 40fc0604..20a99ee9 100755 --- a/tools/ds-identify +++ b/tools/ds-identify @@ -905,6 +905,11 @@ ec2_identify_platform() { *.zstack.io) _RET="ZStack"; return 0;; esac + local vendor="${DI_DMI_SYS_VENDOR}" + case "$vendor" in + e24cloud) _RET="E24cloud"; return 0;; + esac + # AWS http://docs.aws.amazon.com/AWSEC2/ # latest/UserGuide/identify_ec2_instances.html local uuid="" hvuuid="${PATH_SYS_HYPERVISOR}/uuid" -- cgit v1.2.3 From 4bc399e0cd0b7e9177f948aecd49f6b8323ff30b Mon Sep 17 00:00:00 2001 From: Ryan Harper Date: Fri, 22 Nov 2019 21:05:44 -0600 Subject: ec2: Add support for AWS IMDS v2 (session-oriented) (#55) * ec2: Add support for AWS IMDS v2 (session-oriented) AWS now supports a new version of fetching Instance Metadata[1]. Update cloud-init's ec2 utility functions and update ec2 derived datasources accordingly. For DataSourceEc2 (versus ec2-look-alikes) cloud-init will issue the PUT request to obtain an API token for the maximum lifetime and then all subsequent interactions with the IMDS will include the token in the header. If the API token endpoint is unreachable on Ec2 platform, log a warning and fallback to using IMDS v1 and which does not use session tokens when communicating with the Instance metadata service. We handle read errors, typically seen if the IMDS is beyond one etwork hop (IMDSv2 responses have a ttl=1), by setting the api token to a disabled value and then using IMDSv1 paths. To support token-based headers, ec2_utils functions were updated to support custom headers_cb and exception_cb callback functions so Ec2 could store, or refresh API tokens in the event of token becoming stale. [1] https://docs.aws.amazon.com/AWSEC2/latest/ \ UserGuide/ec2-instance-metadata.html \ #instance-metadata-v2-how-it-works --- cloudinit/ec2_utils.py | 37 +++-- cloudinit/sources/DataSourceCloudStack.py | 2 +- cloudinit/sources/DataSourceEc2.py | 166 ++++++++++++++++++--- cloudinit/sources/DataSourceExoscale.py | 2 +- cloudinit/sources/DataSourceMAAS.py | 2 +- cloudinit/sources/DataSourceOpenStack.py | 2 +- cloudinit/url_helper.py | 15 +- tests/unittests/test_datasource/test_cloudstack.py | 21 ++- tests/unittests/test_datasource/test_ec2.py | 6 +- 9 files changed, 201 insertions(+), 52 deletions(-) (limited to 'cloudinit/sources/DataSourceEc2.py') diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index 3b7b17f1..57708c14 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -134,25 +134,28 @@ class MetadataMaterializer(object): return joined -def _skip_retry_on_codes(status_codes, _request_args, cause): +def skip_retry_on_codes(status_codes, _request_args, cause): """Returns False if cause.code is in status_codes.""" return cause.code not in status_codes def get_instance_userdata(api_version='latest', metadata_address='http://169.254.169.254', - ssl_details=None, timeout=5, retries=5): + ssl_details=None, timeout=5, retries=5, + headers_cb=None, exception_cb=None): ud_url = url_helper.combine_url(metadata_address, api_version) ud_url = url_helper.combine_url(ud_url, 'user-data') user_data = '' try: - # It is ok for userdata to not exist (thats why we are stopping if - # NOT_FOUND occurs) and just in that case returning an empty string. - exception_cb = functools.partial(_skip_retry_on_codes, - SKIP_USERDATA_CODES) + if not exception_cb: + # It is ok for userdata to not exist (thats why we are stopping if + # NOT_FOUND occurs) and just in that case returning an empty + # string. + exception_cb = functools.partial(skip_retry_on_codes, + SKIP_USERDATA_CODES) response = url_helper.read_file_or_url( ud_url, ssl_details=ssl_details, timeout=timeout, - retries=retries, exception_cb=exception_cb) + retries=retries, exception_cb=exception_cb, headers_cb=headers_cb) user_data = response.contents except url_helper.UrlError as e: if e.code not in SKIP_USERDATA_CODES: @@ -165,11 +168,13 @@ def get_instance_userdata(api_version='latest', def _get_instance_metadata(tree, api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, - leaf_decoder=None): + leaf_decoder=None, headers_cb=None, + exception_cb=None): md_url = url_helper.combine_url(metadata_address, api_version, tree) caller = functools.partial( url_helper.read_file_or_url, ssl_details=ssl_details, - timeout=timeout, retries=retries) + timeout=timeout, retries=retries, headers_cb=headers_cb, + exception_cb=exception_cb) def mcaller(url): return caller(url).contents @@ -191,22 +196,28 @@ def _get_instance_metadata(tree, api_version='latest', def get_instance_metadata(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, - leaf_decoder=None): + leaf_decoder=None, headers_cb=None, + exception_cb=None): # Note, 'meta-data' explicitly has trailing /. # this is required for CloudStack (LP: #1356855) return _get_instance_metadata(tree='meta-data/', api_version=api_version, metadata_address=metadata_address, ssl_details=ssl_details, timeout=timeout, - retries=retries, leaf_decoder=leaf_decoder) + retries=retries, leaf_decoder=leaf_decoder, + headers_cb=headers_cb, + exception_cb=exception_cb) def get_instance_identity(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, - leaf_decoder=None): + leaf_decoder=None, headers_cb=None, + exception_cb=None): return _get_instance_metadata(tree='dynamic/instance-identity', api_version=api_version, metadata_address=metadata_address, ssl_details=ssl_details, timeout=timeout, - retries=retries, leaf_decoder=leaf_decoder) + retries=retries, leaf_decoder=leaf_decoder, + headers_cb=headers_cb, + exception_cb=exception_cb) # vi: ts=4 expandtab diff --git a/cloudinit/sources/DataSourceCloudStack.py b/cloudinit/sources/DataSourceCloudStack.py index e333cb50..2013bed7 100644 --- a/cloudinit/sources/DataSourceCloudStack.py +++ b/cloudinit/sources/DataSourceCloudStack.py @@ -93,7 +93,7 @@ class DataSourceCloudStack(sources.DataSource): urls = [uhelp.combine_url(self.metadata_address, 'latest/meta-data/instance-id')] start_time = time.time() - url = uhelp.wait_for_url( + url, _response = uhelp.wait_for_url( urls=urls, max_wait=url_params.max_wait_seconds, timeout=url_params.timeout_seconds, status_cb=LOG.warning) diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 1d88c9b1..b9f346a6 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -28,6 +28,10 @@ SKIP_METADATA_URL_CODES = frozenset([uhelp.NOT_FOUND]) STRICT_ID_PATH = ("datasource", "Ec2", "strict_id") STRICT_ID_DEFAULT = "warn" +API_TOKEN_ROUTE = 'latest/api/token' +API_TOKEN_DISABLED = '_ec2_disable_api_token' +AWS_TOKEN_TTL_SECONDS = '21600' + class CloudNames(object): ALIYUN = "aliyun" @@ -62,6 +66,7 @@ class DataSourceEc2(sources.DataSource): url_max_wait = 120 url_timeout = 50 + _api_token = None # API token for accessing the metadata service _network_config = sources.UNSET # Used to cache calculated network cfg v1 # Whether we want to get network configuration from the metadata service. @@ -148,11 +153,12 @@ class DataSourceEc2(sources.DataSource): min_metadata_version. """ # Assumes metadata service is already up + url_tmpl = '{0}/{1}/meta-data/instance-id' + headers = self._get_headers() for api_ver in self.extended_metadata_versions: - url = '{0}/{1}/meta-data/instance-id'.format( - self.metadata_address, api_ver) + url = url_tmpl.format(self.metadata_address, api_ver) try: - resp = uhelp.readurl(url=url) + resp = uhelp.readurl(url=url, headers=headers) except uhelp.UrlError as e: LOG.debug('url %s raised exception %s', url, e) else: @@ -172,12 +178,39 @@ class DataSourceEc2(sources.DataSource): # setup self.identity. So we need to do that now. api_version = self.get_metadata_api_version() self.identity = ec2.get_instance_identity( - api_version, self.metadata_address).get('document', {}) + api_version, self.metadata_address, + headers_cb=self._get_headers, + exception_cb=self._refresh_stale_aws_token_cb).get( + 'document', {}) return self.identity.get( 'instanceId', self.metadata['instance-id']) else: return self.metadata['instance-id'] + def _maybe_fetch_api_token(self, mdurls, timeout=None, max_wait=None): + if self.cloud_name != CloudNames.AWS: + return + + urls = [] + url2base = {} + url_path = API_TOKEN_ROUTE + request_method = 'PUT' + for url in mdurls: + cur = '{0}/{1}'.format(url, url_path) + urls.append(cur) + url2base[cur] = url + + # use the self._status_cb to check for Read errors, which means + # we can't reach the API token URL, so we should disable IMDSv2 + LOG.debug('Fetching Ec2 IMDSv2 API Token') + url, response = uhelp.wait_for_url( + urls=urls, max_wait=1, timeout=1, status_cb=self._status_cb, + headers_cb=self._get_headers, request_method=request_method) + + if url and response: + self._api_token = response + return url2base[url] + def wait_for_metadata_service(self): mcfg = self.ds_cfg @@ -199,27 +232,39 @@ class DataSourceEc2(sources.DataSource): LOG.warning("Empty metadata url list! using default list") mdurls = self.metadata_urls - urls = [] - url2base = {} - for url in mdurls: - cur = '{0}/{1}/meta-data/instance-id'.format( - url, self.min_metadata_version) - urls.append(cur) - url2base[cur] = url - - start_time = time.time() - url = uhelp.wait_for_url( - urls=urls, max_wait=url_params.max_wait_seconds, - timeout=url_params.timeout_seconds, status_cb=LOG.warning) - - if url: - self.metadata_address = url2base[url] + # try the api token path first + metadata_address = self._maybe_fetch_api_token(mdurls) + if not metadata_address: + if self._api_token == API_TOKEN_DISABLED: + LOG.warning('Retrying with IMDSv1') + # if we can't get a token, use instance-id path + urls = [] + url2base = {} + url_path = '{ver}/meta-data/instance-id'.format( + ver=self.min_metadata_version) + request_method = 'GET' + for url in mdurls: + cur = '{0}/{1}'.format(url, url_path) + urls.append(cur) + url2base[cur] = url + + start_time = time.time() + url, _ = uhelp.wait_for_url( + urls=urls, max_wait=url_params.max_wait_seconds, + timeout=url_params.timeout_seconds, status_cb=LOG.warning, + headers_cb=self._get_headers, request_method=request_method) + + if url: + metadata_address = url2base[url] + + if metadata_address: + self.metadata_address = metadata_address LOG.debug("Using metadata source: '%s'", self.metadata_address) else: LOG.critical("Giving up on md from %s after %s seconds", urls, int(time.time() - start_time)) - return bool(url) + return bool(metadata_address) def device_name_to_device(self, name): # Consult metadata service, that has @@ -376,14 +421,22 @@ class DataSourceEc2(sources.DataSource): return {} api_version = self.get_metadata_api_version() crawled_metadata = {} + if self.cloud_name == CloudNames.AWS: + exc_cb = self._refresh_stale_aws_token_cb + exc_cb_ud = self._skip_or_refresh_stale_aws_token_cb + else: + exc_cb = exc_cb_ud = None try: crawled_metadata['user-data'] = ec2.get_instance_userdata( - api_version, self.metadata_address) + api_version, self.metadata_address, + headers_cb=self._get_headers, exception_cb=exc_cb_ud) crawled_metadata['meta-data'] = ec2.get_instance_metadata( - api_version, self.metadata_address) + api_version, self.metadata_address, + headers_cb=self._get_headers, exception_cb=exc_cb) if self.cloud_name == CloudNames.AWS: identity = ec2.get_instance_identity( - api_version, self.metadata_address) + api_version, self.metadata_address, + headers_cb=self._get_headers, exception_cb=exc_cb) crawled_metadata['dynamic'] = {'instance-identity': identity} except Exception: util.logexc( @@ -393,6 +446,73 @@ class DataSourceEc2(sources.DataSource): crawled_metadata['_metadata_api_version'] = api_version return crawled_metadata + def _refresh_api_token(self, seconds=AWS_TOKEN_TTL_SECONDS): + """Request new metadata API token. + @param seconds: The lifetime of the token in seconds + + @return: The API token or None if unavailable. + """ + if self.cloud_name != CloudNames.AWS: + return None + LOG.debug("Refreshing Ec2 metadata API token") + request_header = {'X-aws-ec2-metadata-token-ttl-seconds': seconds} + token_url = '{}/{}'.format(self.metadata_address, API_TOKEN_ROUTE) + try: + response = uhelp.readurl( + token_url, headers=request_header, request_method="PUT") + except uhelp.UrlError as e: + LOG.warning( + 'Unable to get API token: %s raised exception %s', + token_url, e) + return None + return response.contents + + def _skip_or_refresh_stale_aws_token_cb(self, msg, exception): + """Callback will not retry on SKIP_USERDATA_CODES or if no token + is available.""" + retry = ec2.skip_retry_on_codes( + ec2.SKIP_USERDATA_CODES, msg, exception) + if not retry: + return False # False raises exception + return self._refresh_stale_aws_token_cb(msg, exception) + + def _refresh_stale_aws_token_cb(self, msg, exception): + """Exception handler for Ec2 to refresh token if token is stale.""" + if isinstance(exception, uhelp.UrlError) and exception.code == 401: + # With _api_token as None, _get_headers will _refresh_api_token. + LOG.debug("Clearing cached Ec2 API token due to expiry") + self._api_token = None + return True # always retry + + def _status_cb(self, msg, exc=None): + LOG.warning(msg) + if 'Read timed out' in msg: + LOG.warning('Cannot use Ec2 IMDSv2 API tokens, using IMDSv1') + self._api_token = API_TOKEN_DISABLED + + def _get_headers(self, url=''): + """Return a dict of headers for accessing a url. + + If _api_token is unset on AWS, attempt to refresh the token via a PUT + and then return the updated token header. + """ + if self.cloud_name != CloudNames.AWS or (self._api_token == + API_TOKEN_DISABLED): + return {} + # Request a 6 hour token if URL is API_TOKEN_ROUTE + request_token_header = { + 'X-aws-ec2-metadata-token-ttl-seconds': AWS_TOKEN_TTL_SECONDS} + if API_TOKEN_ROUTE in url: + return request_token_header + if not self._api_token: + # If we don't yet have an API token, get one via a PUT against + # API_TOKEN_ROUTE. This _api_token may get unset by a 403 due + # to an invalid or expired token + self._api_token = self._refresh_api_token() + if not self._api_token: + return {} + return {'X-aws-ec2-metadata-token': self._api_token} + class DataSourceEc2Local(DataSourceEc2): """Datasource run at init-local which sets up network to query metadata. diff --git a/cloudinit/sources/DataSourceExoscale.py b/cloudinit/sources/DataSourceExoscale.py index 4616daa7..d59aefd1 100644 --- a/cloudinit/sources/DataSourceExoscale.py +++ b/cloudinit/sources/DataSourceExoscale.py @@ -61,7 +61,7 @@ class DataSourceExoscale(sources.DataSource): metadata_url = "{}/{}/meta-data/instance-id".format( self.metadata_url, self.api_version) - url = url_helper.wait_for_url( + url, _response = url_helper.wait_for_url( urls=[metadata_url], max_wait=self.url_max_wait, timeout=self.url_timeout, diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py index 61aa6d7e..517913aa 100644 --- a/cloudinit/sources/DataSourceMAAS.py +++ b/cloudinit/sources/DataSourceMAAS.py @@ -136,7 +136,7 @@ class DataSourceMAAS(sources.DataSource): url = url[:-1] check_url = "%s/%s/meta-data/instance-id" % (url, MD_VERSION) urls = [check_url] - url = self.oauth_helper.wait_for_url( + url, _response = self.oauth_helper.wait_for_url( urls=urls, max_wait=max_wait, timeout=timeout) if url: diff --git a/cloudinit/sources/DataSourceOpenStack.py b/cloudinit/sources/DataSourceOpenStack.py index 4a015240..7a5e71b6 100644 --- a/cloudinit/sources/DataSourceOpenStack.py +++ b/cloudinit/sources/DataSourceOpenStack.py @@ -76,7 +76,7 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource): url_params = self.get_url_params() start_time = time.time() - avail_url = url_helper.wait_for_url( + avail_url, _response = url_helper.wait_for_url( urls=md_urls, max_wait=url_params.max_wait_seconds, timeout=url_params.timeout_seconds) if avail_url: diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 0f4c36f7..48ddae45 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -101,7 +101,7 @@ def read_file_or_url(url, timeout=5, retries=10, raise UrlError(cause=e, code=code, headers=None, url=url) return FileResponse(file_path, contents=contents) else: - return readurl(url, timeout=timeout, retries=retries, headers=headers, + return readurl(url, timeout=timeout, retries=retries, headers_cb=headers_cb, data=data, sec_between=sec_between, ssl_details=ssl_details, exception_cb=exception_cb) @@ -310,7 +310,7 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, def wait_for_url(urls, max_wait=None, timeout=None, status_cb=None, headers_cb=None, sleep_time=1, - exception_cb=None, sleep_time_cb=None): + exception_cb=None, sleep_time_cb=None, request_method=None): """ urls: a list of urls to try max_wait: roughly the maximum time to wait before giving up @@ -325,6 +325,8 @@ def wait_for_url(urls, max_wait=None, timeout=None, 'exception', the exception that occurred. sleep_time_cb: call method with 2 arguments (response, loop_n) that generates the next sleep time. + request_method: indicate the type of HTTP request, GET, PUT, or POST + returns: tuple of (url, response contents), on failure, (False, None) the idea of this routine is to wait for the EC2 metadata service to come up. On both Eucalyptus and EC2 we have seen the case where @@ -381,8 +383,9 @@ def wait_for_url(urls, max_wait=None, timeout=None, else: headers = {} - response = readurl(url, headers=headers, timeout=timeout, - check_status=False) + response = readurl( + url, headers=headers, timeout=timeout, + check_status=False, request_method=request_method) if not response.contents: reason = "empty response [%s]" % (response.code) url_exc = UrlError(ValueError(reason), code=response.code, @@ -392,7 +395,7 @@ def wait_for_url(urls, max_wait=None, timeout=None, url_exc = UrlError(ValueError(reason), code=response.code, headers=response.headers, url=url) else: - return url + return url, response.contents except UrlError as e: reason = "request error [%s]" % e url_exc = e @@ -421,7 +424,7 @@ def wait_for_url(urls, max_wait=None, timeout=None, sleep_time) time.sleep(sleep_time) - return False + return False, None class OauthUrlHelper(object): diff --git a/tests/unittests/test_datasource/test_cloudstack.py b/tests/unittests/test_datasource/test_cloudstack.py index d6d2d6b2..83c2f753 100644 --- a/tests/unittests/test_datasource/test_cloudstack.py +++ b/tests/unittests/test_datasource/test_cloudstack.py @@ -10,6 +10,9 @@ from cloudinit.tests.helpers import CiTestCase, ExitStack, mock import os import time +MOD_PATH = 'cloudinit.sources.DataSourceCloudStack' +DS_PATH = MOD_PATH + '.DataSourceCloudStack' + class TestCloudStackPasswordFetching(CiTestCase): @@ -17,7 +20,7 @@ class TestCloudStackPasswordFetching(CiTestCase): super(TestCloudStackPasswordFetching, self).setUp() self.patches = ExitStack() self.addCleanup(self.patches.close) - mod_name = 'cloudinit.sources.DataSourceCloudStack' + mod_name = MOD_PATH self.patches.enter_context(mock.patch('{0}.ec2'.format(mod_name))) self.patches.enter_context(mock.patch('{0}.uhelp'.format(mod_name))) default_gw = "192.201.20.0" @@ -56,7 +59,9 @@ class TestCloudStackPasswordFetching(CiTestCase): ds.get_data() self.assertEqual({}, ds.get_config_obj()) - def test_password_sets_password(self): + @mock.patch(DS_PATH + '.wait_for_metadata_service') + def test_password_sets_password(self, m_wait): + m_wait.return_value = True password = 'SekritSquirrel' self._set_password_server_response(password) ds = DataSourceCloudStack( @@ -64,7 +69,9 @@ class TestCloudStackPasswordFetching(CiTestCase): ds.get_data() self.assertEqual(password, ds.get_config_obj()['password']) - def test_bad_request_doesnt_stop_ds_from_working(self): + @mock.patch(DS_PATH + '.wait_for_metadata_service') + def test_bad_request_doesnt_stop_ds_from_working(self, m_wait): + m_wait.return_value = True self._set_password_server_response('bad_request') ds = DataSourceCloudStack( {}, None, helpers.Paths({'run_dir': self.tmp})) @@ -79,7 +86,9 @@ class TestCloudStackPasswordFetching(CiTestCase): request_types.append(arg.split()[1]) self.assertEqual(expected_request_types, request_types) - def test_valid_response_means_password_marked_as_saved(self): + @mock.patch(DS_PATH + '.wait_for_metadata_service') + def test_valid_response_means_password_marked_as_saved(self, m_wait): + m_wait.return_value = True password = 'SekritSquirrel' subp = self._set_password_server_response(password) ds = DataSourceCloudStack( @@ -92,7 +101,9 @@ class TestCloudStackPasswordFetching(CiTestCase): subp = self._set_password_server_response(response_string) ds = DataSourceCloudStack( {}, None, helpers.Paths({'run_dir': self.tmp})) - ds.get_data() + with mock.patch(DS_PATH + '.wait_for_metadata_service') as m_wait: + m_wait.return_value = True + ds.get_data() self.assertRequestTypesSent(subp, ['send_my_password']) def test_password_not_saved_if_empty(self): diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py index 5e1dd777..34a089f2 100644 --- a/tests/unittests/test_datasource/test_ec2.py +++ b/tests/unittests/test_datasource/test_ec2.py @@ -191,7 +191,9 @@ def register_mock_metaserver(base_url, data): register(base_url, 'not found', status=404) def myreg(*argc, **kwargs): - return httpretty.register_uri(httpretty.GET, *argc, **kwargs) + url = argc[0] + method = httpretty.PUT if ec2.API_TOKEN_ROUTE in url else httpretty.GET + return httpretty.register_uri(method, *argc, **kwargs) register_helper(myreg, base_url, data) @@ -237,6 +239,8 @@ class TestEc2(test_helpers.HttprettyTestCase): if md: all_versions = ( [ds.min_metadata_version] + ds.extended_metadata_versions) + token_url = self.data_url('latest', data_item='api/token') + register_mock_metaserver(token_url, 'API-TOKEN') for version in all_versions: metadata_url = self.data_url(version) + '/' if version == md_version: -- cgit v1.2.3 From 87cd040ed8fe7195cbb357ed3bbf53cd2a81436c Mon Sep 17 00:00:00 2001 From: Ryan Harper Date: Wed, 19 Feb 2020 15:01:09 -0600 Subject: ec2: Do not log IMDSv2 token values, instead use REDACTED (#219) Instead of logging the token values used log the headers and replace the actual values with the string 'REDACTED'. This allows users to examine cloud-init.log and see that the IMDSv2 token header is being used but avoids leaving the value used in the log file itself. LP: #1863943 --- cloudinit/ec2_utils.py | 12 ++++++++-- cloudinit/sources/DataSourceEc2.py | 35 +++++++++++++++++++---------- cloudinit/url_helper.py | 27 ++++++++++++++++------ tests/unittests/test_datasource/test_ec2.py | 17 ++++++++++++++ 4 files changed, 70 insertions(+), 21 deletions(-) (limited to 'cloudinit/sources/DataSourceEc2.py') diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index 57708c14..34acfe84 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -142,7 +142,8 @@ def skip_retry_on_codes(status_codes, _request_args, cause): def get_instance_userdata(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, - headers_cb=None, exception_cb=None): + headers_cb=None, headers_redact=None, + exception_cb=None): ud_url = url_helper.combine_url(metadata_address, api_version) ud_url = url_helper.combine_url(ud_url, 'user-data') user_data = '' @@ -155,7 +156,8 @@ def get_instance_userdata(api_version='latest', SKIP_USERDATA_CODES) response = url_helper.read_file_or_url( ud_url, ssl_details=ssl_details, timeout=timeout, - retries=retries, exception_cb=exception_cb, headers_cb=headers_cb) + retries=retries, exception_cb=exception_cb, headers_cb=headers_cb, + headers_redact=headers_redact) user_data = response.contents except url_helper.UrlError as e: if e.code not in SKIP_USERDATA_CODES: @@ -169,11 +171,13 @@ def _get_instance_metadata(tree, api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, leaf_decoder=None, headers_cb=None, + headers_redact=None, exception_cb=None): md_url = url_helper.combine_url(metadata_address, api_version, tree) caller = functools.partial( url_helper.read_file_or_url, ssl_details=ssl_details, timeout=timeout, retries=retries, headers_cb=headers_cb, + headers_redact=headers_redact, exception_cb=exception_cb) def mcaller(url): @@ -197,6 +201,7 @@ def get_instance_metadata(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, leaf_decoder=None, headers_cb=None, + headers_redact=None, exception_cb=None): # Note, 'meta-data' explicitly has trailing /. # this is required for CloudStack (LP: #1356855) @@ -204,6 +209,7 @@ def get_instance_metadata(api_version='latest', metadata_address=metadata_address, ssl_details=ssl_details, timeout=timeout, retries=retries, leaf_decoder=leaf_decoder, + headers_redact=headers_redact, headers_cb=headers_cb, exception_cb=exception_cb) @@ -212,12 +218,14 @@ def get_instance_identity(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, leaf_decoder=None, headers_cb=None, + headers_redact=None, exception_cb=None): return _get_instance_metadata(tree='dynamic/instance-identity', api_version=api_version, metadata_address=metadata_address, ssl_details=ssl_details, timeout=timeout, retries=retries, leaf_decoder=leaf_decoder, + headers_redact=headers_redact, headers_cb=headers_cb, exception_cb=exception_cb) # vi: ts=4 expandtab diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index b9f346a6..0f2bfef4 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -31,6 +31,9 @@ STRICT_ID_DEFAULT = "warn" API_TOKEN_ROUTE = 'latest/api/token' API_TOKEN_DISABLED = '_ec2_disable_api_token' AWS_TOKEN_TTL_SECONDS = '21600' +AWS_TOKEN_PUT_HEADER = 'X-aws-ec2-metadata-token' +AWS_TOKEN_REQ_HEADER = AWS_TOKEN_PUT_HEADER + '-ttl-seconds' +AWS_TOKEN_REDACT = [AWS_TOKEN_PUT_HEADER, AWS_TOKEN_REQ_HEADER] class CloudNames(object): @@ -158,7 +161,8 @@ class DataSourceEc2(sources.DataSource): for api_ver in self.extended_metadata_versions: url = url_tmpl.format(self.metadata_address, api_ver) try: - resp = uhelp.readurl(url=url, headers=headers) + resp = uhelp.readurl(url=url, headers=headers, + headers_redact=AWS_TOKEN_REDACT) except uhelp.UrlError as e: LOG.debug('url %s raised exception %s', url, e) else: @@ -180,6 +184,7 @@ class DataSourceEc2(sources.DataSource): self.identity = ec2.get_instance_identity( api_version, self.metadata_address, headers_cb=self._get_headers, + headers_redact=AWS_TOKEN_REDACT, exception_cb=self._refresh_stale_aws_token_cb).get( 'document', {}) return self.identity.get( @@ -205,7 +210,8 @@ class DataSourceEc2(sources.DataSource): LOG.debug('Fetching Ec2 IMDSv2 API Token') url, response = uhelp.wait_for_url( urls=urls, max_wait=1, timeout=1, status_cb=self._status_cb, - headers_cb=self._get_headers, request_method=request_method) + headers_cb=self._get_headers, request_method=request_method, + headers_redact=AWS_TOKEN_REDACT) if url and response: self._api_token = response @@ -252,7 +258,8 @@ class DataSourceEc2(sources.DataSource): url, _ = uhelp.wait_for_url( urls=urls, max_wait=url_params.max_wait_seconds, timeout=url_params.timeout_seconds, status_cb=LOG.warning, - headers_cb=self._get_headers, request_method=request_method) + headers_redact=AWS_TOKEN_REDACT, headers_cb=self._get_headers, + request_method=request_method) if url: metadata_address = url2base[url] @@ -420,6 +427,7 @@ class DataSourceEc2(sources.DataSource): if not self.wait_for_metadata_service(): return {} api_version = self.get_metadata_api_version() + redact = AWS_TOKEN_REDACT crawled_metadata = {} if self.cloud_name == CloudNames.AWS: exc_cb = self._refresh_stale_aws_token_cb @@ -429,14 +437,17 @@ class DataSourceEc2(sources.DataSource): try: crawled_metadata['user-data'] = ec2.get_instance_userdata( api_version, self.metadata_address, - headers_cb=self._get_headers, exception_cb=exc_cb_ud) + headers_cb=self._get_headers, headers_redact=redact, + exception_cb=exc_cb_ud) crawled_metadata['meta-data'] = ec2.get_instance_metadata( api_version, self.metadata_address, - headers_cb=self._get_headers, exception_cb=exc_cb) + headers_cb=self._get_headers, headers_redact=redact, + exception_cb=exc_cb) if self.cloud_name == CloudNames.AWS: identity = ec2.get_instance_identity( api_version, self.metadata_address, - headers_cb=self._get_headers, exception_cb=exc_cb) + headers_cb=self._get_headers, headers_redact=redact, + exception_cb=exc_cb) crawled_metadata['dynamic'] = {'instance-identity': identity} except Exception: util.logexc( @@ -455,11 +466,12 @@ class DataSourceEc2(sources.DataSource): if self.cloud_name != CloudNames.AWS: return None LOG.debug("Refreshing Ec2 metadata API token") - request_header = {'X-aws-ec2-metadata-token-ttl-seconds': seconds} + request_header = {AWS_TOKEN_REQ_HEADER: seconds} token_url = '{}/{}'.format(self.metadata_address, API_TOKEN_ROUTE) try: - response = uhelp.readurl( - token_url, headers=request_header, request_method="PUT") + response = uhelp.readurl(token_url, headers=request_header, + headers_redact=AWS_TOKEN_REDACT, + request_method="PUT") except uhelp.UrlError as e: LOG.warning( 'Unable to get API token: %s raised exception %s', @@ -500,8 +512,7 @@ class DataSourceEc2(sources.DataSource): API_TOKEN_DISABLED): return {} # Request a 6 hour token if URL is API_TOKEN_ROUTE - request_token_header = { - 'X-aws-ec2-metadata-token-ttl-seconds': AWS_TOKEN_TTL_SECONDS} + request_token_header = {AWS_TOKEN_REQ_HEADER: AWS_TOKEN_TTL_SECONDS} if API_TOKEN_ROUTE in url: return request_token_header if not self._api_token: @@ -511,7 +522,7 @@ class DataSourceEc2(sources.DataSource): self._api_token = self._refresh_api_token() if not self._api_token: return {} - return {'X-aws-ec2-metadata-token': self._api_token} + return {AWS_TOKEN_PUT_HEADER: self._api_token} class DataSourceEc2Local(DataSourceEc2): diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index f6d68436..eeb27aa8 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -8,6 +8,7 @@ # # This file is part of cloud-init. See LICENSE file for license information. +import copy import json import os import time @@ -31,6 +32,7 @@ LOG = logging.getLogger(__name__) SSL_ENABLED = False CONFIG_ENABLED = False # This was added in 0.7 (but taken out in >=1.0) _REQ_VER = None +REDACTED = 'REDACTED' try: from distutils.version import LooseVersion import pkg_resources @@ -189,9 +191,9 @@ def _get_ssl_args(url, ssl_details): def readurl(url, data=None, timeout=None, retries=0, sec_between=1, - headers=None, headers_cb=None, ssl_details=None, - check_status=True, allow_redirects=True, exception_cb=None, - session=None, infinite=False, log_req_resp=True, + headers=None, headers_cb=None, headers_redact=None, + ssl_details=None, check_status=True, allow_redirects=True, + exception_cb=None, session=None, infinite=False, log_req_resp=True, request_method=None): """Wrapper around requests.Session to read the url and retry if necessary @@ -207,6 +209,7 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, :param headers: Optional dict of headers to send during request :param headers_cb: Optional callable returning a dict of values to send as headers during request + :param headers_redact: Optional list of header names to redact from the log :param ssl_details: Optional dict providing key_file, ca_certs, and cert_file keys for use on in ssl connections. :param check_status: Optional boolean set True to raise when HTTPError @@ -233,6 +236,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, req_args['method'] = request_method if timeout is not None: req_args['timeout'] = max(float(timeout), 0) + if headers_redact is None: + headers_redact = [] # It doesn't seem like config # was added in older library versions (or newer ones either), thus we # need to manually do the retries if it wasn't... @@ -277,6 +282,12 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, if k == 'data': continue filtered_req_args[k] = v + if k == 'headers': + for hkey, _hval in v.items(): + if hkey in headers_redact: + filtered_req_args[k][hkey] = ( + copy.deepcopy(req_args[k][hkey])) + filtered_req_args[k][hkey] = REDACTED try: if log_req_resp: @@ -329,8 +340,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, return None # Should throw before this... -def wait_for_url(urls, max_wait=None, timeout=None, - status_cb=None, headers_cb=None, sleep_time=1, +def wait_for_url(urls, max_wait=None, timeout=None, status_cb=None, + headers_cb=None, headers_redact=None, sleep_time=1, exception_cb=None, sleep_time_cb=None, request_method=None): """ urls: a list of urls to try @@ -342,6 +353,7 @@ def wait_for_url(urls, max_wait=None, timeout=None, status_cb: call method with string message when a url is not available headers_cb: call method with single argument of url to get headers for request. + headers_redact: a list of header names to redact from the log exception_cb: call method with 2 arguments 'msg' (per status_cb) and 'exception', the exception that occurred. sleep_time_cb: call method with 2 arguments (response, loop_n) that @@ -405,8 +417,9 @@ def wait_for_url(urls, max_wait=None, timeout=None, headers = {} response = readurl( - url, headers=headers, timeout=timeout, - check_status=False, request_method=request_method) + url, headers=headers, headers_redact=headers_redact, + timeout=timeout, check_status=False, + request_method=request_method) if not response.contents: reason = "empty response [%s]" % (response.code) url_exc = UrlError(ValueError(reason), code=response.code, diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py index 19e1af2b..2a96122f 100644 --- a/tests/unittests/test_datasource/test_ec2.py +++ b/tests/unittests/test_datasource/test_ec2.py @@ -429,6 +429,23 @@ class TestEc2(test_helpers.HttprettyTestCase): self.assertTrue(ds.get_data()) self.assertFalse(ds.is_classic_instance()) + def test_aws_token_redacted(self): + """Verify that aws tokens are redacted when logged.""" + ds = self._setup_ds( + platform_data=self.valid_platform_data, + sys_cfg={'datasource': {'Ec2': {'strict_id': False}}}, + md={'md': DEFAULT_METADATA}) + self.assertTrue(ds.get_data()) + all_logs = self.logs.getvalue().splitlines() + REDACT_TTL = "'X-aws-ec2-metadata-token-ttl-seconds': 'REDACTED'" + REDACT_TOK = "'X-aws-ec2-metadata-token': 'REDACTED'" + logs_with_redacted_ttl = [log for log in all_logs if REDACT_TTL in log] + logs_with_redacted = [log for log in all_logs if REDACT_TOK in log] + logs_with_token = [log for log in all_logs if 'API-TOKEN' in log] + self.assertEqual(1, len(logs_with_redacted_ttl)) + self.assertEqual(79, len(logs_with_redacted)) + self.assertEqual(0, len(logs_with_token)) + @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') def test_valid_platform_with_strict_true(self, m_dhcp): """Valid platform data should return true with strict_id true.""" -- cgit v1.2.3