From 94a64529dccebd8fe8c7969370b8696e46023fbd Mon Sep 17 00:00:00 2001 From: Paride Legovini Date: Wed, 30 Jan 2019 15:38:56 +0000 Subject: Resolve flake8 comparison and pycodestyle over-ident issues Fixes: - flake8: use ==/!= to compare str, bytes, and int literals - pycodestyle: E117 over-indented --- cloudinit/url_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 396d69ae..0af0d9e3 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -521,7 +521,7 @@ class OauthUrlHelper(object): if extra_exception_cb: ret = extra_exception_cb(msg, exception) finally: - self.exception_cb(msg, exception) + self.exception_cb(msg, exception) return ret def _headers_cb(self, extra_headers_cb, url): -- cgit v1.2.3 From 155847209e6a3ed5face91a133d8488a703f3f93 Mon Sep 17 00:00:00 2001 From: Rick Wright Date: Fri, 9 Aug 2019 17:11:05 +0000 Subject: Add support for publishing host keys to GCE guest attributes This adds an empty publish_host_keys() method to the default datasource that is called by cc_ssh.py. This feature can be controlled by the 'ssh_publish_hostkeys' config option. It is enabled by default but can be disabled by setting 'enabled' to false. Also, a blacklist of key types is supported. In addition, this change implements ssh_publish_hostkeys() for the GCE datasource, attempting to write the hostkeys to the instance's guest attributes. Using these hostkeys for ssh connections is currently supported by the alpha version of Google's 'gcloud' command-line tool. (On Google Compute Engine, this feature will be enabled by setting the 'enable-guest-attributes' metadata key to 'true' for the project/instance that you would like to use this feature for. When connecting to the instance for the first time using 'gcloud compute ssh' the hostkeys will be read from the guest attributes for the instance and written to the user's local known_hosts file for Google Compute Engine instances.) --- cloudinit/config/cc_ssh.py | 55 +++++++++ cloudinit/config/tests/test_ssh.py | 166 ++++++++++++++++++++++++++++ cloudinit/sources/DataSourceGCE.py | 22 +++- cloudinit/sources/__init__.py | 10 ++ cloudinit/url_helper.py | 9 +- tests/unittests/test_datasource/test_gce.py | 18 +++ 6 files changed, 274 insertions(+), 6 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/config/cc_ssh.py b/cloudinit/config/cc_ssh.py index f8f7cb35..53f69399 100755 --- a/cloudinit/config/cc_ssh.py +++ b/cloudinit/config/cc_ssh.py @@ -91,6 +91,9 @@ public keys. ssh_authorized_keys: - ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAGEA3FSyQwBI6Z+nCSjUU ... - ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA3I7VUf2l5gSn5uavROsc5HRDpZ ... + ssh_publish_hostkeys: + enabled: (Defaults to true) + blacklist: (Defaults to [dsa]) """ import glob @@ -104,6 +107,10 @@ from cloudinit import util GENERATE_KEY_NAMES = ['rsa', 'dsa', 'ecdsa', 'ed25519'] KEY_FILE_TPL = '/etc/ssh/ssh_host_%s_key' +PUBLISH_HOST_KEYS = True +# Don't publish the dsa hostkey by default since OpenSSH recommends not using +# it. +HOST_KEY_PUBLISH_BLACKLIST = ['dsa'] CONFIG_KEY_TO_FILE = {} PRIV_TO_PUB = {} @@ -176,6 +183,23 @@ def handle(_name, cfg, cloud, log, _args): util.logexc(log, "Failed generating key type %s to " "file %s", keytype, keyfile) + if "ssh_publish_hostkeys" in cfg: + host_key_blacklist = util.get_cfg_option_list( + cfg["ssh_publish_hostkeys"], "blacklist", + HOST_KEY_PUBLISH_BLACKLIST) + publish_hostkeys = util.get_cfg_option_bool( + cfg["ssh_publish_hostkeys"], "enabled", PUBLISH_HOST_KEYS) + else: + host_key_blacklist = HOST_KEY_PUBLISH_BLACKLIST + publish_hostkeys = PUBLISH_HOST_KEYS + + if publish_hostkeys: + hostkeys = get_public_host_keys(blacklist=host_key_blacklist) + try: + cloud.datasource.publish_host_keys(hostkeys) + except Exception as e: + util.logexc(log, "Publishing host keys failed!") + try: (users, _groups) = ug_util.normalize_users_groups(cfg, cloud.distro) (user, _user_config) = ug_util.extract_default(users) @@ -209,4 +233,35 @@ def apply_credentials(keys, user, disable_root, disable_root_opts): ssh_util.setup_user_keys(keys, 'root', options=key_prefix) + +def get_public_host_keys(blacklist=None): + """Read host keys from /etc/ssh/*.pub files and return them as a list. + + @param blacklist: List of key types to ignore. e.g. ['dsa', 'rsa'] + @returns: List of keys, each formatted as a two-element tuple. + e.g. [('ssh-rsa', 'AAAAB3Nz...'), ('ssh-ed25519', 'AAAAC3Nx...')] + """ + public_key_file_tmpl = '%s.pub' % (KEY_FILE_TPL,) + key_list = [] + blacklist_files = [] + if blacklist: + # Convert blacklist to filenames: + # 'dsa' -> '/etc/ssh/ssh_host_dsa_key.pub' + blacklist_files = [public_key_file_tmpl % (key_type,) + for key_type in blacklist] + # Get list of public key files and filter out blacklisted files. + file_list = [hostfile for hostfile + in glob.glob(public_key_file_tmpl % ('*',)) + if hostfile not in blacklist_files] + + # Read host key files, retrieve first two fields as a tuple and + # append that tuple to key_list. + for file_name in file_list: + file_contents = util.load_file(file_name) + key_data = file_contents.split() + if key_data and len(key_data) > 1: + key_list.append(tuple(key_data[:2])) + return key_list + + # vi: ts=4 expandtab diff --git a/cloudinit/config/tests/test_ssh.py b/cloudinit/config/tests/test_ssh.py index c8a4271f..e7789842 100644 --- a/cloudinit/config/tests/test_ssh.py +++ b/cloudinit/config/tests/test_ssh.py @@ -1,5 +1,6 @@ # This file is part of cloud-init. See LICENSE file for license information. +import os.path from cloudinit.config import cc_ssh from cloudinit import ssh_util @@ -12,6 +13,25 @@ MODPATH = "cloudinit.config.cc_ssh." class TestHandleSsh(CiTestCase): """Test cc_ssh handling of ssh config.""" + def _publish_hostkey_test_setup(self): + self.test_hostkeys = { + 'dsa': ('ssh-dss', 'AAAAB3NzaC1kc3MAAACB'), + 'ecdsa': ('ecdsa-sha2-nistp256', 'AAAAE2VjZ'), + 'ed25519': ('ssh-ed25519', 'AAAAC3NzaC1lZDI'), + 'rsa': ('ssh-rsa', 'AAAAB3NzaC1yc2EAAA'), + } + self.test_hostkey_files = [] + hostkey_tmpdir = self.tmp_dir() + for key_type in ['dsa', 'ecdsa', 'ed25519', 'rsa']: + key_data = self.test_hostkeys[key_type] + filename = 'ssh_host_%s_key.pub' % key_type + filepath = os.path.join(hostkey_tmpdir, filename) + self.test_hostkey_files.append(filepath) + with open(filepath, 'w') as f: + f.write(' '.join(key_data)) + + cc_ssh.KEY_FILE_TPL = os.path.join(hostkey_tmpdir, 'ssh_host_%s_key') + def test_apply_credentials_with_user(self, m_setup_keys): """Apply keys for the given user and root.""" keys = ["key1"] @@ -64,6 +84,7 @@ class TestHandleSsh(CiTestCase): # Mock os.path.exits to True to short-circuit the key writing logic m_path_exists.return_value = True m_nug.return_value = ([], {}) + cc_ssh.PUBLISH_HOST_KEYS = False cloud = self.tmp_cloud( distro='ubuntu', metadata={'public-keys': keys}) cc_ssh.handle("name", cfg, cloud, None, None) @@ -149,3 +170,148 @@ class TestHandleSsh(CiTestCase): self.assertEqual([mock.call(set(keys), user), mock.call(set(keys), "root", options="")], m_setup_keys.call_args_list) + + @mock.patch(MODPATH + "glob.glob") + @mock.patch(MODPATH + "ug_util.normalize_users_groups") + @mock.patch(MODPATH + "os.path.exists") + def test_handle_publish_hostkeys_default( + self, m_path_exists, m_nug, m_glob, m_setup_keys): + """Test handle with various configs for ssh_publish_hostkeys.""" + self._publish_hostkey_test_setup() + cc_ssh.PUBLISH_HOST_KEYS = True + keys = ["key1"] + user = "clouduser" + # Return no matching keys for first glob, test keys for second. + m_glob.side_effect = iter([ + [], + self.test_hostkey_files, + ]) + # Mock os.path.exits to True to short-circuit the key writing logic + m_path_exists.return_value = True + m_nug.return_value = ({user: {"default": user}}, {}) + cloud = self.tmp_cloud( + distro='ubuntu', metadata={'public-keys': keys}) + cloud.datasource.publish_host_keys = mock.Mock() + + cfg = {} + expected_call = [self.test_hostkeys[key_type] for key_type + in ['ecdsa', 'ed25519', 'rsa']] + cc_ssh.handle("name", cfg, cloud, None, None) + self.assertEqual([mock.call(expected_call)], + cloud.datasource.publish_host_keys.call_args_list) + + @mock.patch(MODPATH + "glob.glob") + @mock.patch(MODPATH + "ug_util.normalize_users_groups") + @mock.patch(MODPATH + "os.path.exists") + def test_handle_publish_hostkeys_config_enable( + self, m_path_exists, m_nug, m_glob, m_setup_keys): + """Test handle with various configs for ssh_publish_hostkeys.""" + self._publish_hostkey_test_setup() + cc_ssh.PUBLISH_HOST_KEYS = False + keys = ["key1"] + user = "clouduser" + # Return no matching keys for first glob, test keys for second. + m_glob.side_effect = iter([ + [], + self.test_hostkey_files, + ]) + # Mock os.path.exits to True to short-circuit the key writing logic + m_path_exists.return_value = True + m_nug.return_value = ({user: {"default": user}}, {}) + cloud = self.tmp_cloud( + distro='ubuntu', metadata={'public-keys': keys}) + cloud.datasource.publish_host_keys = mock.Mock() + + cfg = {'ssh_publish_hostkeys': {'enabled': True}} + expected_call = [self.test_hostkeys[key_type] for key_type + in ['ecdsa', 'ed25519', 'rsa']] + cc_ssh.handle("name", cfg, cloud, None, None) + self.assertEqual([mock.call(expected_call)], + cloud.datasource.publish_host_keys.call_args_list) + + @mock.patch(MODPATH + "glob.glob") + @mock.patch(MODPATH + "ug_util.normalize_users_groups") + @mock.patch(MODPATH + "os.path.exists") + def test_handle_publish_hostkeys_config_disable( + self, m_path_exists, m_nug, m_glob, m_setup_keys): + """Test handle with various configs for ssh_publish_hostkeys.""" + self._publish_hostkey_test_setup() + cc_ssh.PUBLISH_HOST_KEYS = True + keys = ["key1"] + user = "clouduser" + # Return no matching keys for first glob, test keys for second. + m_glob.side_effect = iter([ + [], + self.test_hostkey_files, + ]) + # Mock os.path.exits to True to short-circuit the key writing logic + m_path_exists.return_value = True + m_nug.return_value = ({user: {"default": user}}, {}) + cloud = self.tmp_cloud( + distro='ubuntu', metadata={'public-keys': keys}) + cloud.datasource.publish_host_keys = mock.Mock() + + cfg = {'ssh_publish_hostkeys': {'enabled': False}} + cc_ssh.handle("name", cfg, cloud, None, None) + self.assertFalse(cloud.datasource.publish_host_keys.call_args_list) + cloud.datasource.publish_host_keys.assert_not_called() + + @mock.patch(MODPATH + "glob.glob") + @mock.patch(MODPATH + "ug_util.normalize_users_groups") + @mock.patch(MODPATH + "os.path.exists") + def test_handle_publish_hostkeys_config_blacklist( + self, m_path_exists, m_nug, m_glob, m_setup_keys): + """Test handle with various configs for ssh_publish_hostkeys.""" + self._publish_hostkey_test_setup() + cc_ssh.PUBLISH_HOST_KEYS = True + keys = ["key1"] + user = "clouduser" + # Return no matching keys for first glob, test keys for second. + m_glob.side_effect = iter([ + [], + self.test_hostkey_files, + ]) + # Mock os.path.exits to True to short-circuit the key writing logic + m_path_exists.return_value = True + m_nug.return_value = ({user: {"default": user}}, {}) + cloud = self.tmp_cloud( + distro='ubuntu', metadata={'public-keys': keys}) + cloud.datasource.publish_host_keys = mock.Mock() + + cfg = {'ssh_publish_hostkeys': {'enabled': True, + 'blacklist': ['dsa', 'rsa']}} + expected_call = [self.test_hostkeys[key_type] for key_type + in ['ecdsa', 'ed25519']] + cc_ssh.handle("name", cfg, cloud, None, None) + self.assertEqual([mock.call(expected_call)], + cloud.datasource.publish_host_keys.call_args_list) + + @mock.patch(MODPATH + "glob.glob") + @mock.patch(MODPATH + "ug_util.normalize_users_groups") + @mock.patch(MODPATH + "os.path.exists") + def test_handle_publish_hostkeys_empty_blacklist( + self, m_path_exists, m_nug, m_glob, m_setup_keys): + """Test handle with various configs for ssh_publish_hostkeys.""" + self._publish_hostkey_test_setup() + cc_ssh.PUBLISH_HOST_KEYS = True + keys = ["key1"] + user = "clouduser" + # Return no matching keys for first glob, test keys for second. + m_glob.side_effect = iter([ + [], + self.test_hostkey_files, + ]) + # Mock os.path.exits to True to short-circuit the key writing logic + m_path_exists.return_value = True + m_nug.return_value = ({user: {"default": user}}, {}) + cloud = self.tmp_cloud( + distro='ubuntu', metadata={'public-keys': keys}) + cloud.datasource.publish_host_keys = mock.Mock() + + cfg = {'ssh_publish_hostkeys': {'enabled': True, + 'blacklist': []}} + expected_call = [self.test_hostkeys[key_type] for key_type + in ['dsa', 'ecdsa', 'ed25519', 'rsa']] + cc_ssh.handle("name", cfg, cloud, None, None) + self.assertEqual([mock.call(expected_call)], + cloud.datasource.publish_host_keys.call_args_list) diff --git a/cloudinit/sources/DataSourceGCE.py b/cloudinit/sources/DataSourceGCE.py index d8162623..6cbfbbac 100644 --- a/cloudinit/sources/DataSourceGCE.py +++ b/cloudinit/sources/DataSourceGCE.py @@ -18,10 +18,13 @@ LOG = logging.getLogger(__name__) MD_V1_URL = 'http://metadata.google.internal/computeMetadata/v1/' BUILTIN_DS_CONFIG = {'metadata_url': MD_V1_URL} REQUIRED_FIELDS = ('instance-id', 'availability-zone', 'local-hostname') +GUEST_ATTRIBUTES_URL = ('http://metadata.google.internal/computeMetadata/' + 'v1/instance/guest-attributes') +HOSTKEY_NAMESPACE = 'hostkeys' +HEADERS = {'Metadata-Flavor': 'Google'} class GoogleMetadataFetcher(object): - headers = {'Metadata-Flavor': 'Google'} def __init__(self, metadata_address): self.metadata_address = metadata_address @@ -32,7 +35,7 @@ class GoogleMetadataFetcher(object): url = self.metadata_address + path if is_recursive: url += '/?recursive=True' - resp = url_helper.readurl(url=url, headers=self.headers) + resp = url_helper.readurl(url=url, headers=HEADERS) except url_helper.UrlError as exc: msg = "url %s raised exception %s" LOG.debug(msg, path, exc) @@ -90,6 +93,10 @@ class DataSourceGCE(sources.DataSource): public_keys_data = self.metadata['public-keys-data'] return _parse_public_keys(public_keys_data, self.default_user) + def publish_host_keys(self, hostkeys): + for key in hostkeys: + _write_host_key_to_guest_attributes(*key) + def get_hostname(self, fqdn=False, resolve_ip=False, metadata_only=False): # GCE has long FDQN's and has asked for short hostnames. return self.metadata['local-hostname'].split('.')[0] @@ -103,6 +110,17 @@ class DataSourceGCE(sources.DataSource): return self.availability_zone.rsplit('-', 1)[0] +def _write_host_key_to_guest_attributes(key_type, key_value): + url = '%s/%s/%s' % (GUEST_ATTRIBUTES_URL, HOSTKEY_NAMESPACE, key_type) + key_value = key_value.encode('utf-8') + resp = url_helper.readurl(url=url, data=key_value, headers=HEADERS, + request_method='PUT', check_status=False) + if resp.ok(): + LOG.debug('Wrote %s host key to guest attributes.', key_type) + else: + LOG.debug('Unable to write %s host key to guest attributes.', key_type) + + def _has_expired(public_key): # Check whether an SSH key is expired. Public key input is a single SSH # public key in the GCE specific key format documented here: diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index c2baccd5..a319322b 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -491,6 +491,16 @@ class DataSource(object): def get_public_ssh_keys(self): return normalize_pubkey_data(self.metadata.get('public-keys')) + def publish_host_keys(self, hostkeys): + """Publish the public SSH host keys (found in /etc/ssh/*.pub). + + @param hostkeys: List of host key tuples (key_type, key_value), + where key_type is the first field in the public key file + (e.g. 'ssh-rsa') and key_value is the key itself + (e.g. 'AAAAB3NzaC1y...'). + """ + pass + def _remap_device(self, short_name): # LP: #611137 # the metadata service may believe that devices are named 'sda' diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 0af0d9e3..44ee61d4 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -199,18 +199,19 @@ def _get_ssl_args(url, ssl_details): def readurl(url, data=None, timeout=None, retries=0, sec_between=1, headers=None, headers_cb=None, ssl_details=None, check_status=True, allow_redirects=True, exception_cb=None, - session=None, infinite=False, log_req_resp=True): + session=None, infinite=False, log_req_resp=True, + request_method=None): url = _cleanurl(url) req_args = { 'url': url, } req_args.update(_get_ssl_args(url, ssl_details)) req_args['allow_redirects'] = allow_redirects - req_args['method'] = 'GET' + if not request_method: + request_method = 'POST' if data else 'GET' + req_args['method'] = request_method if timeout is not None: req_args['timeout'] = max(float(timeout), 0) - if data: - req_args['method'] = 'POST' # It doesn't seem like config # was added in older library versions (or newer ones either), thus we # need to manually do the retries if it wasn't... diff --git a/tests/unittests/test_datasource/test_gce.py b/tests/unittests/test_datasource/test_gce.py index 41176c6a..67744d32 100644 --- a/tests/unittests/test_datasource/test_gce.py +++ b/tests/unittests/test_datasource/test_gce.py @@ -55,6 +55,8 @@ GCE_USER_DATA_TEXT = { HEADERS = {'Metadata-Flavor': 'Google'} MD_URL_RE = re.compile( r'http://metadata.google.internal/computeMetadata/v1/.*') +GUEST_ATTRIBUTES_URL = ('http://metadata.google.internal/computeMetadata/' + 'v1/instance/guest-attributes/hostkeys/') def _set_mock_metadata(gce_meta=None): @@ -341,4 +343,20 @@ class TestDataSourceGCE(test_helpers.HttprettyTestCase): public_key_data, default_user='default') self.assertEqual(sorted(found), sorted(expected)) + @mock.patch("cloudinit.url_helper.readurl") + def test_publish_host_keys(self, m_readurl): + hostkeys = [('ssh-rsa', 'asdfasdf'), + ('ssh-ed25519', 'qwerqwer')] + readurl_expected_calls = [ + mock.call(check_status=False, data=b'asdfasdf', headers=HEADERS, + request_method='PUT', + url='%s%s' % (GUEST_ATTRIBUTES_URL, 'ssh-rsa')), + mock.call(check_status=False, data=b'qwerqwer', headers=HEADERS, + request_method='PUT', + url='%s%s' % (GUEST_ATTRIBUTES_URL, 'ssh-ed25519')), + ] + self.ds.publish_host_keys(hostkeys) + m_readurl.assert_has_calls(readurl_expected_calls, any_order=True) + + # vi: ts=4 expandtab -- cgit v1.2.3 From 26e1f063677d974ab3b1a48e010e732a538b9a8d Mon Sep 17 00:00:00 2001 From: Dominic Schlegel Date: Wed, 16 Oct 2019 13:49:43 +0000 Subject: Small typo fixes in code comments. --- cloudinit/stages.py | 2 +- cloudinit/templater.py | 4 ++-- cloudinit/url_helper.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 50129884..77c21de0 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -501,7 +501,7 @@ class Init(object): # Init the handlers first for (_ctype, mod) in c_handlers.items(): if mod in c_handlers.initialized: - # Avoid initing the same module twice (if said module + # Avoid initiating the same module twice (if said module # is registered to more than one content-type). continue handlers.call_begin(mod, data, frequency) diff --git a/cloudinit/templater.py b/cloudinit/templater.py index b668674b..e47cdeda 100644 --- a/cloudinit/templater.py +++ b/cloudinit/templater.py @@ -44,7 +44,7 @@ MISSING_JINJA_PREFIX = u'CI_MISSING_JINJA_VAR/' @implements_to_string # Needed for python2.7. Otherwise cached super.__str__ class UndefinedJinjaVariable(JUndefined): - """Class used to represent any undefined jinja template varible.""" + """Class used to represent any undefined jinja template variable.""" def __str__(self): return u'%s%s' % (MISSING_JINJA_PREFIX, self._undefined_name) @@ -58,7 +58,7 @@ class UndefinedJinjaVariable(JUndefined): def basic_render(content, params): - """This does sumple replacement of bash variable like templates. + """This does simple replacement of bash variable like templates. It identifies patterns like ${a} or $a and can also identify patterns like ${a.b} or $a.b which will look for a key 'b' in the dictionary rooted diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 44ee61d4..0f4c36f7 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -326,10 +326,10 @@ def wait_for_url(urls, max_wait=None, timeout=None, sleep_time_cb: call method with 2 arguments (response, loop_n) that generates the next sleep time. - the idea of this routine is to wait for the EC2 metdata service to + the idea of this routine is to wait for the EC2 metadata service to come up. On both Eucalyptus and EC2 we have seen the case where the instance hit the MD before the MD service was up. EC2 seems - to have permenantely fixed this, though. + to have permanently fixed this, though. In openstack, the metadata service might be painfully slow, and unable to avoid hitting a timeout of even up to 10 seconds or more @@ -338,7 +338,7 @@ def wait_for_url(urls, max_wait=None, timeout=None, Offset those needs with the need to not hang forever (and block boot) on a system where cloud-init is configured to look for EC2 Metadata service but is not going to find one. It is possible that the instance - data host (169.254.169.254) may be firewalled off Entirely for a sytem, + data host (169.254.169.254) may be firewalled off Entirely for a system, meaning that the connection will block forever unless a timeout is set. A value of None for max_wait will retry indefinitely. -- cgit v1.2.3 From 4bc399e0cd0b7e9177f948aecd49f6b8323ff30b Mon Sep 17 00:00:00 2001 From: Ryan Harper Date: Fri, 22 Nov 2019 21:05:44 -0600 Subject: ec2: Add support for AWS IMDS v2 (session-oriented) (#55) * ec2: Add support for AWS IMDS v2 (session-oriented) AWS now supports a new version of fetching Instance Metadata[1]. Update cloud-init's ec2 utility functions and update ec2 derived datasources accordingly. For DataSourceEc2 (versus ec2-look-alikes) cloud-init will issue the PUT request to obtain an API token for the maximum lifetime and then all subsequent interactions with the IMDS will include the token in the header. If the API token endpoint is unreachable on Ec2 platform, log a warning and fallback to using IMDS v1 and which does not use session tokens when communicating with the Instance metadata service. We handle read errors, typically seen if the IMDS is beyond one etwork hop (IMDSv2 responses have a ttl=1), by setting the api token to a disabled value and then using IMDSv1 paths. To support token-based headers, ec2_utils functions were updated to support custom headers_cb and exception_cb callback functions so Ec2 could store, or refresh API tokens in the event of token becoming stale. [1] https://docs.aws.amazon.com/AWSEC2/latest/ \ UserGuide/ec2-instance-metadata.html \ #instance-metadata-v2-how-it-works --- cloudinit/ec2_utils.py | 37 +++-- cloudinit/sources/DataSourceCloudStack.py | 2 +- cloudinit/sources/DataSourceEc2.py | 166 ++++++++++++++++++--- cloudinit/sources/DataSourceExoscale.py | 2 +- cloudinit/sources/DataSourceMAAS.py | 2 +- cloudinit/sources/DataSourceOpenStack.py | 2 +- cloudinit/url_helper.py | 15 +- tests/unittests/test_datasource/test_cloudstack.py | 21 ++- tests/unittests/test_datasource/test_ec2.py | 6 +- 9 files changed, 201 insertions(+), 52 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index 3b7b17f1..57708c14 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -134,25 +134,28 @@ class MetadataMaterializer(object): return joined -def _skip_retry_on_codes(status_codes, _request_args, cause): +def skip_retry_on_codes(status_codes, _request_args, cause): """Returns False if cause.code is in status_codes.""" return cause.code not in status_codes def get_instance_userdata(api_version='latest', metadata_address='http://169.254.169.254', - ssl_details=None, timeout=5, retries=5): + ssl_details=None, timeout=5, retries=5, + headers_cb=None, exception_cb=None): ud_url = url_helper.combine_url(metadata_address, api_version) ud_url = url_helper.combine_url(ud_url, 'user-data') user_data = '' try: - # It is ok for userdata to not exist (thats why we are stopping if - # NOT_FOUND occurs) and just in that case returning an empty string. - exception_cb = functools.partial(_skip_retry_on_codes, - SKIP_USERDATA_CODES) + if not exception_cb: + # It is ok for userdata to not exist (thats why we are stopping if + # NOT_FOUND occurs) and just in that case returning an empty + # string. + exception_cb = functools.partial(skip_retry_on_codes, + SKIP_USERDATA_CODES) response = url_helper.read_file_or_url( ud_url, ssl_details=ssl_details, timeout=timeout, - retries=retries, exception_cb=exception_cb) + retries=retries, exception_cb=exception_cb, headers_cb=headers_cb) user_data = response.contents except url_helper.UrlError as e: if e.code not in SKIP_USERDATA_CODES: @@ -165,11 +168,13 @@ def get_instance_userdata(api_version='latest', def _get_instance_metadata(tree, api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, - leaf_decoder=None): + leaf_decoder=None, headers_cb=None, + exception_cb=None): md_url = url_helper.combine_url(metadata_address, api_version, tree) caller = functools.partial( url_helper.read_file_or_url, ssl_details=ssl_details, - timeout=timeout, retries=retries) + timeout=timeout, retries=retries, headers_cb=headers_cb, + exception_cb=exception_cb) def mcaller(url): return caller(url).contents @@ -191,22 +196,28 @@ def _get_instance_metadata(tree, api_version='latest', def get_instance_metadata(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, - leaf_decoder=None): + leaf_decoder=None, headers_cb=None, + exception_cb=None): # Note, 'meta-data' explicitly has trailing /. # this is required for CloudStack (LP: #1356855) return _get_instance_metadata(tree='meta-data/', api_version=api_version, metadata_address=metadata_address, ssl_details=ssl_details, timeout=timeout, - retries=retries, leaf_decoder=leaf_decoder) + retries=retries, leaf_decoder=leaf_decoder, + headers_cb=headers_cb, + exception_cb=exception_cb) def get_instance_identity(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, - leaf_decoder=None): + leaf_decoder=None, headers_cb=None, + exception_cb=None): return _get_instance_metadata(tree='dynamic/instance-identity', api_version=api_version, metadata_address=metadata_address, ssl_details=ssl_details, timeout=timeout, - retries=retries, leaf_decoder=leaf_decoder) + retries=retries, leaf_decoder=leaf_decoder, + headers_cb=headers_cb, + exception_cb=exception_cb) # vi: ts=4 expandtab diff --git a/cloudinit/sources/DataSourceCloudStack.py b/cloudinit/sources/DataSourceCloudStack.py index e333cb50..2013bed7 100644 --- a/cloudinit/sources/DataSourceCloudStack.py +++ b/cloudinit/sources/DataSourceCloudStack.py @@ -93,7 +93,7 @@ class DataSourceCloudStack(sources.DataSource): urls = [uhelp.combine_url(self.metadata_address, 'latest/meta-data/instance-id')] start_time = time.time() - url = uhelp.wait_for_url( + url, _response = uhelp.wait_for_url( urls=urls, max_wait=url_params.max_wait_seconds, timeout=url_params.timeout_seconds, status_cb=LOG.warning) diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 1d88c9b1..b9f346a6 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -28,6 +28,10 @@ SKIP_METADATA_URL_CODES = frozenset([uhelp.NOT_FOUND]) STRICT_ID_PATH = ("datasource", "Ec2", "strict_id") STRICT_ID_DEFAULT = "warn" +API_TOKEN_ROUTE = 'latest/api/token' +API_TOKEN_DISABLED = '_ec2_disable_api_token' +AWS_TOKEN_TTL_SECONDS = '21600' + class CloudNames(object): ALIYUN = "aliyun" @@ -62,6 +66,7 @@ class DataSourceEc2(sources.DataSource): url_max_wait = 120 url_timeout = 50 + _api_token = None # API token for accessing the metadata service _network_config = sources.UNSET # Used to cache calculated network cfg v1 # Whether we want to get network configuration from the metadata service. @@ -148,11 +153,12 @@ class DataSourceEc2(sources.DataSource): min_metadata_version. """ # Assumes metadata service is already up + url_tmpl = '{0}/{1}/meta-data/instance-id' + headers = self._get_headers() for api_ver in self.extended_metadata_versions: - url = '{0}/{1}/meta-data/instance-id'.format( - self.metadata_address, api_ver) + url = url_tmpl.format(self.metadata_address, api_ver) try: - resp = uhelp.readurl(url=url) + resp = uhelp.readurl(url=url, headers=headers) except uhelp.UrlError as e: LOG.debug('url %s raised exception %s', url, e) else: @@ -172,12 +178,39 @@ class DataSourceEc2(sources.DataSource): # setup self.identity. So we need to do that now. api_version = self.get_metadata_api_version() self.identity = ec2.get_instance_identity( - api_version, self.metadata_address).get('document', {}) + api_version, self.metadata_address, + headers_cb=self._get_headers, + exception_cb=self._refresh_stale_aws_token_cb).get( + 'document', {}) return self.identity.get( 'instanceId', self.metadata['instance-id']) else: return self.metadata['instance-id'] + def _maybe_fetch_api_token(self, mdurls, timeout=None, max_wait=None): + if self.cloud_name != CloudNames.AWS: + return + + urls = [] + url2base = {} + url_path = API_TOKEN_ROUTE + request_method = 'PUT' + for url in mdurls: + cur = '{0}/{1}'.format(url, url_path) + urls.append(cur) + url2base[cur] = url + + # use the self._status_cb to check for Read errors, which means + # we can't reach the API token URL, so we should disable IMDSv2 + LOG.debug('Fetching Ec2 IMDSv2 API Token') + url, response = uhelp.wait_for_url( + urls=urls, max_wait=1, timeout=1, status_cb=self._status_cb, + headers_cb=self._get_headers, request_method=request_method) + + if url and response: + self._api_token = response + return url2base[url] + def wait_for_metadata_service(self): mcfg = self.ds_cfg @@ -199,27 +232,39 @@ class DataSourceEc2(sources.DataSource): LOG.warning("Empty metadata url list! using default list") mdurls = self.metadata_urls - urls = [] - url2base = {} - for url in mdurls: - cur = '{0}/{1}/meta-data/instance-id'.format( - url, self.min_metadata_version) - urls.append(cur) - url2base[cur] = url - - start_time = time.time() - url = uhelp.wait_for_url( - urls=urls, max_wait=url_params.max_wait_seconds, - timeout=url_params.timeout_seconds, status_cb=LOG.warning) - - if url: - self.metadata_address = url2base[url] + # try the api token path first + metadata_address = self._maybe_fetch_api_token(mdurls) + if not metadata_address: + if self._api_token == API_TOKEN_DISABLED: + LOG.warning('Retrying with IMDSv1') + # if we can't get a token, use instance-id path + urls = [] + url2base = {} + url_path = '{ver}/meta-data/instance-id'.format( + ver=self.min_metadata_version) + request_method = 'GET' + for url in mdurls: + cur = '{0}/{1}'.format(url, url_path) + urls.append(cur) + url2base[cur] = url + + start_time = time.time() + url, _ = uhelp.wait_for_url( + urls=urls, max_wait=url_params.max_wait_seconds, + timeout=url_params.timeout_seconds, status_cb=LOG.warning, + headers_cb=self._get_headers, request_method=request_method) + + if url: + metadata_address = url2base[url] + + if metadata_address: + self.metadata_address = metadata_address LOG.debug("Using metadata source: '%s'", self.metadata_address) else: LOG.critical("Giving up on md from %s after %s seconds", urls, int(time.time() - start_time)) - return bool(url) + return bool(metadata_address) def device_name_to_device(self, name): # Consult metadata service, that has @@ -376,14 +421,22 @@ class DataSourceEc2(sources.DataSource): return {} api_version = self.get_metadata_api_version() crawled_metadata = {} + if self.cloud_name == CloudNames.AWS: + exc_cb = self._refresh_stale_aws_token_cb + exc_cb_ud = self._skip_or_refresh_stale_aws_token_cb + else: + exc_cb = exc_cb_ud = None try: crawled_metadata['user-data'] = ec2.get_instance_userdata( - api_version, self.metadata_address) + api_version, self.metadata_address, + headers_cb=self._get_headers, exception_cb=exc_cb_ud) crawled_metadata['meta-data'] = ec2.get_instance_metadata( - api_version, self.metadata_address) + api_version, self.metadata_address, + headers_cb=self._get_headers, exception_cb=exc_cb) if self.cloud_name == CloudNames.AWS: identity = ec2.get_instance_identity( - api_version, self.metadata_address) + api_version, self.metadata_address, + headers_cb=self._get_headers, exception_cb=exc_cb) crawled_metadata['dynamic'] = {'instance-identity': identity} except Exception: util.logexc( @@ -393,6 +446,73 @@ class DataSourceEc2(sources.DataSource): crawled_metadata['_metadata_api_version'] = api_version return crawled_metadata + def _refresh_api_token(self, seconds=AWS_TOKEN_TTL_SECONDS): + """Request new metadata API token. + @param seconds: The lifetime of the token in seconds + + @return: The API token or None if unavailable. + """ + if self.cloud_name != CloudNames.AWS: + return None + LOG.debug("Refreshing Ec2 metadata API token") + request_header = {'X-aws-ec2-metadata-token-ttl-seconds': seconds} + token_url = '{}/{}'.format(self.metadata_address, API_TOKEN_ROUTE) + try: + response = uhelp.readurl( + token_url, headers=request_header, request_method="PUT") + except uhelp.UrlError as e: + LOG.warning( + 'Unable to get API token: %s raised exception %s', + token_url, e) + return None + return response.contents + + def _skip_or_refresh_stale_aws_token_cb(self, msg, exception): + """Callback will not retry on SKIP_USERDATA_CODES or if no token + is available.""" + retry = ec2.skip_retry_on_codes( + ec2.SKIP_USERDATA_CODES, msg, exception) + if not retry: + return False # False raises exception + return self._refresh_stale_aws_token_cb(msg, exception) + + def _refresh_stale_aws_token_cb(self, msg, exception): + """Exception handler for Ec2 to refresh token if token is stale.""" + if isinstance(exception, uhelp.UrlError) and exception.code == 401: + # With _api_token as None, _get_headers will _refresh_api_token. + LOG.debug("Clearing cached Ec2 API token due to expiry") + self._api_token = None + return True # always retry + + def _status_cb(self, msg, exc=None): + LOG.warning(msg) + if 'Read timed out' in msg: + LOG.warning('Cannot use Ec2 IMDSv2 API tokens, using IMDSv1') + self._api_token = API_TOKEN_DISABLED + + def _get_headers(self, url=''): + """Return a dict of headers for accessing a url. + + If _api_token is unset on AWS, attempt to refresh the token via a PUT + and then return the updated token header. + """ + if self.cloud_name != CloudNames.AWS or (self._api_token == + API_TOKEN_DISABLED): + return {} + # Request a 6 hour token if URL is API_TOKEN_ROUTE + request_token_header = { + 'X-aws-ec2-metadata-token-ttl-seconds': AWS_TOKEN_TTL_SECONDS} + if API_TOKEN_ROUTE in url: + return request_token_header + if not self._api_token: + # If we don't yet have an API token, get one via a PUT against + # API_TOKEN_ROUTE. This _api_token may get unset by a 403 due + # to an invalid or expired token + self._api_token = self._refresh_api_token() + if not self._api_token: + return {} + return {'X-aws-ec2-metadata-token': self._api_token} + class DataSourceEc2Local(DataSourceEc2): """Datasource run at init-local which sets up network to query metadata. diff --git a/cloudinit/sources/DataSourceExoscale.py b/cloudinit/sources/DataSourceExoscale.py index 4616daa7..d59aefd1 100644 --- a/cloudinit/sources/DataSourceExoscale.py +++ b/cloudinit/sources/DataSourceExoscale.py @@ -61,7 +61,7 @@ class DataSourceExoscale(sources.DataSource): metadata_url = "{}/{}/meta-data/instance-id".format( self.metadata_url, self.api_version) - url = url_helper.wait_for_url( + url, _response = url_helper.wait_for_url( urls=[metadata_url], max_wait=self.url_max_wait, timeout=self.url_timeout, diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py index 61aa6d7e..517913aa 100644 --- a/cloudinit/sources/DataSourceMAAS.py +++ b/cloudinit/sources/DataSourceMAAS.py @@ -136,7 +136,7 @@ class DataSourceMAAS(sources.DataSource): url = url[:-1] check_url = "%s/%s/meta-data/instance-id" % (url, MD_VERSION) urls = [check_url] - url = self.oauth_helper.wait_for_url( + url, _response = self.oauth_helper.wait_for_url( urls=urls, max_wait=max_wait, timeout=timeout) if url: diff --git a/cloudinit/sources/DataSourceOpenStack.py b/cloudinit/sources/DataSourceOpenStack.py index 4a015240..7a5e71b6 100644 --- a/cloudinit/sources/DataSourceOpenStack.py +++ b/cloudinit/sources/DataSourceOpenStack.py @@ -76,7 +76,7 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource): url_params = self.get_url_params() start_time = time.time() - avail_url = url_helper.wait_for_url( + avail_url, _response = url_helper.wait_for_url( urls=md_urls, max_wait=url_params.max_wait_seconds, timeout=url_params.timeout_seconds) if avail_url: diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 0f4c36f7..48ddae45 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -101,7 +101,7 @@ def read_file_or_url(url, timeout=5, retries=10, raise UrlError(cause=e, code=code, headers=None, url=url) return FileResponse(file_path, contents=contents) else: - return readurl(url, timeout=timeout, retries=retries, headers=headers, + return readurl(url, timeout=timeout, retries=retries, headers_cb=headers_cb, data=data, sec_between=sec_between, ssl_details=ssl_details, exception_cb=exception_cb) @@ -310,7 +310,7 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, def wait_for_url(urls, max_wait=None, timeout=None, status_cb=None, headers_cb=None, sleep_time=1, - exception_cb=None, sleep_time_cb=None): + exception_cb=None, sleep_time_cb=None, request_method=None): """ urls: a list of urls to try max_wait: roughly the maximum time to wait before giving up @@ -325,6 +325,8 @@ def wait_for_url(urls, max_wait=None, timeout=None, 'exception', the exception that occurred. sleep_time_cb: call method with 2 arguments (response, loop_n) that generates the next sleep time. + request_method: indicate the type of HTTP request, GET, PUT, or POST + returns: tuple of (url, response contents), on failure, (False, None) the idea of this routine is to wait for the EC2 metadata service to come up. On both Eucalyptus and EC2 we have seen the case where @@ -381,8 +383,9 @@ def wait_for_url(urls, max_wait=None, timeout=None, else: headers = {} - response = readurl(url, headers=headers, timeout=timeout, - check_status=False) + response = readurl( + url, headers=headers, timeout=timeout, + check_status=False, request_method=request_method) if not response.contents: reason = "empty response [%s]" % (response.code) url_exc = UrlError(ValueError(reason), code=response.code, @@ -392,7 +395,7 @@ def wait_for_url(urls, max_wait=None, timeout=None, url_exc = UrlError(ValueError(reason), code=response.code, headers=response.headers, url=url) else: - return url + return url, response.contents except UrlError as e: reason = "request error [%s]" % e url_exc = e @@ -421,7 +424,7 @@ def wait_for_url(urls, max_wait=None, timeout=None, sleep_time) time.sleep(sleep_time) - return False + return False, None class OauthUrlHelper(object): diff --git a/tests/unittests/test_datasource/test_cloudstack.py b/tests/unittests/test_datasource/test_cloudstack.py index d6d2d6b2..83c2f753 100644 --- a/tests/unittests/test_datasource/test_cloudstack.py +++ b/tests/unittests/test_datasource/test_cloudstack.py @@ -10,6 +10,9 @@ from cloudinit.tests.helpers import CiTestCase, ExitStack, mock import os import time +MOD_PATH = 'cloudinit.sources.DataSourceCloudStack' +DS_PATH = MOD_PATH + '.DataSourceCloudStack' + class TestCloudStackPasswordFetching(CiTestCase): @@ -17,7 +20,7 @@ class TestCloudStackPasswordFetching(CiTestCase): super(TestCloudStackPasswordFetching, self).setUp() self.patches = ExitStack() self.addCleanup(self.patches.close) - mod_name = 'cloudinit.sources.DataSourceCloudStack' + mod_name = MOD_PATH self.patches.enter_context(mock.patch('{0}.ec2'.format(mod_name))) self.patches.enter_context(mock.patch('{0}.uhelp'.format(mod_name))) default_gw = "192.201.20.0" @@ -56,7 +59,9 @@ class TestCloudStackPasswordFetching(CiTestCase): ds.get_data() self.assertEqual({}, ds.get_config_obj()) - def test_password_sets_password(self): + @mock.patch(DS_PATH + '.wait_for_metadata_service') + def test_password_sets_password(self, m_wait): + m_wait.return_value = True password = 'SekritSquirrel' self._set_password_server_response(password) ds = DataSourceCloudStack( @@ -64,7 +69,9 @@ class TestCloudStackPasswordFetching(CiTestCase): ds.get_data() self.assertEqual(password, ds.get_config_obj()['password']) - def test_bad_request_doesnt_stop_ds_from_working(self): + @mock.patch(DS_PATH + '.wait_for_metadata_service') + def test_bad_request_doesnt_stop_ds_from_working(self, m_wait): + m_wait.return_value = True self._set_password_server_response('bad_request') ds = DataSourceCloudStack( {}, None, helpers.Paths({'run_dir': self.tmp})) @@ -79,7 +86,9 @@ class TestCloudStackPasswordFetching(CiTestCase): request_types.append(arg.split()[1]) self.assertEqual(expected_request_types, request_types) - def test_valid_response_means_password_marked_as_saved(self): + @mock.patch(DS_PATH + '.wait_for_metadata_service') + def test_valid_response_means_password_marked_as_saved(self, m_wait): + m_wait.return_value = True password = 'SekritSquirrel' subp = self._set_password_server_response(password) ds = DataSourceCloudStack( @@ -92,7 +101,9 @@ class TestCloudStackPasswordFetching(CiTestCase): subp = self._set_password_server_response(response_string) ds = DataSourceCloudStack( {}, None, helpers.Paths({'run_dir': self.tmp})) - ds.get_data() + with mock.patch(DS_PATH + '.wait_for_metadata_service') as m_wait: + m_wait.return_value = True + ds.get_data() self.assertRequestTypesSent(subp, ['send_my_password']) def test_password_not_saved_if_empty(self): diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py index 5e1dd777..34a089f2 100644 --- a/tests/unittests/test_datasource/test_ec2.py +++ b/tests/unittests/test_datasource/test_ec2.py @@ -191,7 +191,9 @@ def register_mock_metaserver(base_url, data): register(base_url, 'not found', status=404) def myreg(*argc, **kwargs): - return httpretty.register_uri(httpretty.GET, *argc, **kwargs) + url = argc[0] + method = httpretty.PUT if ec2.API_TOKEN_ROUTE in url else httpretty.GET + return httpretty.register_uri(method, *argc, **kwargs) register_helper(myreg, base_url, data) @@ -237,6 +239,8 @@ class TestEc2(test_helpers.HttprettyTestCase): if md: all_versions = ( [ds.min_metadata_version] + ds.extended_metadata_versions) + token_url = self.data_url('latest', data_item='api/token') + register_mock_metaserver(token_url, 'API-TOKEN') for version in all_versions: metadata_url = self.data_url(version) + '/' if version == md_version: -- cgit v1.2.3 From f69d33a723b805fec3ee70c3a6127c8cadcb02d8 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Mon, 2 Dec 2019 16:24:18 -0700 Subject: url_helper: read_file_or_url should pass headers param into readurl (#66) Headers param was accidentally omitted and no longer passed through to readurl due to a previous commit. To avoid this omission of params in the future, drop positional param definitions from read_file_or_url and pass all kwargs through to readurl when we are not operating on a file. In util:read_seeded, correct the case where invalid positional param file_retries was being passed into read_file_or_url. Also drop duplicated file:// prefix addition from read_seeded because read_file_or_url does that work anyway. LP: #1854084 --- cloudinit/sources/helpers/azure.py | 6 ++- cloudinit/tests/test_url_helper.py | 52 ++++++++++++++++++++++ cloudinit/url_helper.py | 47 +++++++++++++++---- cloudinit/user_data.py | 2 +- cloudinit/util.py | 15 ++----- .../unittests/test_datasource/test_azure_helper.py | 18 +++++--- 6 files changed, 112 insertions(+), 28 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index f1fba175..f5cdb3fd 100755 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -183,14 +183,16 @@ class AzureEndpointHttpClient(object): if secure: headers = self.headers.copy() headers.update(self.extra_secure_headers) - return url_helper.read_file_or_url(url, headers=headers) + return url_helper.read_file_or_url(url, headers=headers, timeout=5, + retries=10) def post(self, url, data=None, extra_headers=None): headers = self.headers if extra_headers is not None: headers = self.headers.copy() headers.update(extra_headers) - return url_helper.read_file_or_url(url, data=data, headers=headers) + return url_helper.read_file_or_url(url, data=data, headers=headers, + timeout=5, retries=10) class GoalState(object): diff --git a/cloudinit/tests/test_url_helper.py b/cloudinit/tests/test_url_helper.py index aa9f3ec1..e883ddc2 100644 --- a/cloudinit/tests/test_url_helper.py +++ b/cloudinit/tests/test_url_helper.py @@ -4,6 +4,7 @@ from cloudinit.url_helper import ( NOT_FOUND, UrlError, oauth_headers, read_file_or_url, retry_on_url_exc) from cloudinit.tests.helpers import CiTestCase, mock, skipIf from cloudinit import util +from cloudinit import version import httpretty import requests @@ -17,6 +18,9 @@ except ImportError: _missing_oauthlib_dep = True +M_PATH = 'cloudinit.url_helper.' + + class TestOAuthHeaders(CiTestCase): def test_oauth_headers_raises_not_implemented_when_oathlib_missing(self): @@ -67,6 +71,54 @@ class TestReadFileOrUrl(CiTestCase): self.assertEqual(result.contents, data) self.assertEqual(str(result), data.decode('utf-8')) + @mock.patch(M_PATH + 'readurl') + def test_read_file_or_url_passes_params_to_readurl(self, m_readurl): + """read_file_or_url passes all params through to readurl.""" + url = 'http://hostname/path' + response = 'This is my url content\n' + m_readurl.return_value = response + params = {'url': url, 'timeout': 1, 'retries': 2, + 'headers': {'somehdr': 'val'}, + 'data': 'data', 'sec_between': 1, + 'ssl_details': {'cert_file': '/path/cert.pem'}, + 'headers_cb': 'headers_cb', 'exception_cb': 'exception_cb'} + self.assertEqual(response, read_file_or_url(**params)) + params.pop('url') # url is passed in as a positional arg + self.assertEqual([mock.call(url, **params)], m_readurl.call_args_list) + + def test_wb_read_url_defaults_honored_by_read_file_or_url_callers(self): + """Readurl param defaults used when unspecified by read_file_or_url + + Param defaults tested are as follows: + retries: 0, additional headers None beyond default, method: GET, + data: None, check_status: True and allow_redirects: True + """ + url = 'http://hostname/path' + + m_response = mock.MagicMock() + + class FakeSession(requests.Session): + def request(cls, **kwargs): + self.assertEqual( + {'url': url, 'allow_redirects': True, 'method': 'GET', + 'headers': { + 'User-Agent': 'Cloud-Init/%s' % ( + version.version_string())}}, + kwargs) + return m_response + + with mock.patch(M_PATH + 'requests.Session') as m_session: + error = requests.exceptions.HTTPError('broke') + m_session.side_effect = [error, FakeSession()] + # assert no retries and check_status == True + with self.assertRaises(UrlError) as context_manager: + response = read_file_or_url(url) + self.assertEqual('broke', str(context_manager.exception)) + # assert default headers, method, url and allow_redirects True + # Success on 2nd call with FakeSession + response = read_file_or_url(url) + self.assertEqual(m_response, response._response) + class TestRetryOnUrlExc(CiTestCase): diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 48ddae45..1496a471 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -81,14 +81,19 @@ def combine_url(base, *add_ons): return url -def read_file_or_url(url, timeout=5, retries=10, - headers=None, data=None, sec_between=1, ssl_details=None, - headers_cb=None, exception_cb=None): +def read_file_or_url(url, **kwargs): + """Wrapper function around readurl to allow passing a file path as url. + + When url is not a local file path, passthrough any kwargs to readurl. + + In the case of parameter passthrough to readurl, default values for some + parameters. See: call-signature of readurl in this module for param docs. + """ url = url.lstrip() if url.startswith("/"): url = "file://%s" % url if url.lower().startswith("file://"): - if data: + if kwargs.get("data"): LOG.warning("Unable to post data to file resource %s", url) file_path = url[len("file://"):] try: @@ -101,10 +106,7 @@ def read_file_or_url(url, timeout=5, retries=10, raise UrlError(cause=e, code=code, headers=None, url=url) return FileResponse(file_path, contents=contents) else: - return readurl(url, timeout=timeout, retries=retries, - headers_cb=headers_cb, data=data, - sec_between=sec_between, ssl_details=ssl_details, - exception_cb=exception_cb) + return readurl(url, **kwargs) # Made to have same accessors as UrlResponse so that the @@ -201,6 +203,35 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, check_status=True, allow_redirects=True, exception_cb=None, session=None, infinite=False, log_req_resp=True, request_method=None): + """Wrapper around requests.Session to read the url and retry if necessary + + :param url: Mandatory url to request. + :param data: Optional form data to post the URL. Will set request_method + to 'POST' if present. + :param timeout: Timeout in seconds to wait for a response + :param retries: Number of times to retry on exception if exception_cb is + None or exception_cb returns True for the exception caught. Default is + to fail with 0 retries on exception. + :param sec_between: Default 1: amount of seconds passed to time.sleep + between retries. None or -1 means don't sleep. + :param headers: Optional dict of headers to send during request + :param headers_cb: Optional callable returning a dict of values to send as + headers during request + :param ssl_details: Optional dict providing key_file, ca_certs, and + cert_file keys for use on in ssl connections. + :param check_status: Optional boolean set True to raise when HTTPError + occurs. Default: True. + :param allow_redirects: Optional boolean passed straight to Session.request + as 'allow_redirects'. Default: True. + :param exception_cb: Optional callable which accepts the params + msg and exception and returns a boolean True if retries are permitted. + :param session: Optional exiting requests.Session instance to reuse. + :param infinite: Bool, set True to retry indefinitely. Default: False. + :param log_req_resp: Set False to turn off verbose debug messages. + :param request_method: String passed as 'method' to Session.request. + Typically GET, or POST. Default: POST if data is provided, GET + otherwise. + """ url = _cleanurl(url) req_args = { 'url': url, diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index ed83d2d8..15af1daf 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -224,7 +224,7 @@ class UserDataProcessor(object): content = util.load_file(include_once_fn) else: try: - resp = read_file_or_url(include_url, + resp = read_file_or_url(include_url, timeout=5, retries=10, ssl_details=self.ssl_details) if include_once_on and resp.ok(): util.write_file(include_once_fn, resp.contents, diff --git a/cloudinit/util.py b/cloudinit/util.py index 78b6a2d0..9d9d5c72 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -986,13 +986,6 @@ def load_yaml(blob, default=None, allowed=(dict,)): def read_seeded(base="", ext="", timeout=5, retries=10, file_retries=0): - if base.startswith("/"): - base = "file://%s" % base - - # default retries for file is 0. for network is 10 - if base.startswith("file://"): - retries = file_retries - if base.find("%s") >= 0: ud_url = base % ("user-data" + ext) md_url = base % ("meta-data" + ext) @@ -1000,14 +993,14 @@ def read_seeded(base="", ext="", timeout=5, retries=10, file_retries=0): ud_url = "%s%s%s" % (base, "user-data", ext) md_url = "%s%s%s" % (base, "meta-data", ext) - md_resp = url_helper.read_file_or_url(md_url, timeout, retries, - file_retries) + md_resp = url_helper.read_file_or_url(md_url, timeout=timeout, + retries=retries) md = None if md_resp.ok(): md = load_yaml(decode_binary(md_resp.contents), default={}) - ud_resp = url_helper.read_file_or_url(ud_url, timeout, retries, - file_retries) + ud_resp = url_helper.read_file_or_url(ud_url, timeout=timeout, + retries=retries) ud = None if ud_resp.ok(): ud = ud_resp.contents diff --git a/tests/unittests/test_datasource/test_azure_helper.py b/tests/unittests/test_datasource/test_azure_helper.py index bd006aba..bd17f636 100644 --- a/tests/unittests/test_datasource/test_azure_helper.py +++ b/tests/unittests/test_datasource/test_azure_helper.py @@ -212,8 +212,10 @@ class TestAzureEndpointHttpClient(CiTestCase): response = client.get(url, secure=False) self.assertEqual(1, self.read_file_or_url.call_count) self.assertEqual(self.read_file_or_url.return_value, response) - self.assertEqual(mock.call(url, headers=self.regular_headers), - self.read_file_or_url.call_args) + self.assertEqual( + mock.call(url, headers=self.regular_headers, retries=10, + timeout=5), + self.read_file_or_url.call_args) def test_secure_get(self): url = 'MyTestUrl' @@ -227,8 +229,10 @@ class TestAzureEndpointHttpClient(CiTestCase): response = client.get(url, secure=True) self.assertEqual(1, self.read_file_or_url.call_count) self.assertEqual(self.read_file_or_url.return_value, response) - self.assertEqual(mock.call(url, headers=expected_headers), - self.read_file_or_url.call_args) + self.assertEqual( + mock.call(url, headers=expected_headers, retries=10, + timeout=5), + self.read_file_or_url.call_args) def test_post(self): data = mock.MagicMock() @@ -238,7 +242,8 @@ class TestAzureEndpointHttpClient(CiTestCase): self.assertEqual(1, self.read_file_or_url.call_count) self.assertEqual(self.read_file_or_url.return_value, response) self.assertEqual( - mock.call(url, data=data, headers=self.regular_headers), + mock.call(url, data=data, headers=self.regular_headers, retries=10, + timeout=5), self.read_file_or_url.call_args) def test_post_with_extra_headers(self): @@ -250,7 +255,8 @@ class TestAzureEndpointHttpClient(CiTestCase): expected_headers = self.regular_headers.copy() expected_headers.update(extra_headers) self.assertEqual( - mock.call(mock.ANY, data=mock.ANY, headers=expected_headers), + mock.call(mock.ANY, data=mock.ANY, headers=expected_headers, + retries=10, timeout=5), self.read_file_or_url.call_args) -- cgit v1.2.3 From 8c4fd886931abcf2cc8627a47463907d655b35c3 Mon Sep 17 00:00:00 2001 From: Daniel Watkins Date: Tue, 21 Jan 2020 17:15:30 -0500 Subject: Start removing dependency on six (#178) * url_helper: drop six * url_helper: sort imports * log: drop six * log: sort imports * handlers/__init__: drop six * handlers/__init__: sort imports * user_data: drop six * user_data: sort imports * sources/__init__: drop six * sources/__init__: sort imports * DataSourceOVF: drop six * DataSourceOVF: sort imports * sources/helpers/openstack: drop six * sources/helpers/openstack: sort imports * mergers/m_str: drop six This also allowed simplification of the logic, as we will never encounter a non-string text type. * type_utils: drop six * mergers/m_dict: drop six * mergers/m_list: drop six * cmd/query: drop six * mergers/__init__: drop six * net/cmdline: drop six * reporting/handlers: drop six * reporting/handlers: sort imports --- cloudinit/cmd/query.py | 3 +-- cloudinit/handlers/__init__.py | 9 +++------ cloudinit/log.py | 14 +++++--------- cloudinit/mergers/__init__.py | 4 +--- cloudinit/mergers/m_dict.py | 4 +--- cloudinit/mergers/m_list.py | 4 +--- cloudinit/mergers/m_str.py | 9 ++------- cloudinit/net/cmdline.py | 5 +---- cloudinit/reporting/handlers.py | 19 ++++++------------- cloudinit/sources/DataSourceOVF.py | 8 ++------ cloudinit/sources/__init__.py | 21 +++++++++------------ cloudinit/sources/helpers/openstack.py | 8 ++------ cloudinit/type_utils.py | 25 +++++++------------------ cloudinit/url_helper.py | 20 +++++--------------- cloudinit/user_data.py | 11 ++++------- 15 files changed, 50 insertions(+), 114 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/cmd/query.py b/cloudinit/cmd/query.py index 1d888b9d..e3db8679 100644 --- a/cloudinit/cmd/query.py +++ b/cloudinit/cmd/query.py @@ -5,7 +5,6 @@ import argparse from errno import EACCES import os -import six import sys from cloudinit.handlers.jinja_template import ( @@ -149,7 +148,7 @@ def handle_args(name, args): response = '\n'.join(sorted(response.keys())) elif args.list_keys: response = '\n'.join(sorted(response.keys())) - if not isinstance(response, six.string_types): + if not isinstance(response, str): response = util.json_dumps(response) print(response) return 0 diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index 0db75af9..a409ff8a 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -10,14 +10,12 @@ import abc import os -import six - -from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE, FREQUENCIES) from cloudinit import importer from cloudinit import log as logging from cloudinit import type_utils from cloudinit import util +from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE, FREQUENCIES) LOG = logging.getLogger(__name__) @@ -60,8 +58,7 @@ INCLUSION_SRCH = sorted(list(INCLUSION_TYPES_MAP.keys()), key=(lambda e: 0 - len(e))) -@six.add_metaclass(abc.ABCMeta) -class Handler(object): +class Handler(metaclass=abc.ABCMeta): def __init__(self, frequency, version=2): self.handler_version = version @@ -159,7 +156,7 @@ def _extract_first_or_bytes(blob, size): # Extract the first line or upto X symbols for text objects # Extract first X bytes for binary objects try: - if isinstance(blob, six.string_types): + if isinstance(blob, str): start = blob.split("\n", 1)[0] else: # We want to avoid decoding the whole blob (it might be huge) diff --git a/cloudinit/log.py b/cloudinit/log.py index 5ae312ba..827db12b 100644 --- a/cloudinit/log.py +++ b/cloudinit/log.py @@ -8,17 +8,13 @@ # # This file is part of cloud-init. See LICENSE file for license information. +import collections +import io import logging import logging.config import logging.handlers - -import collections import os import sys - -import six -from six import StringIO - import time # Logging levels for easy access @@ -74,13 +70,13 @@ def setupLogging(cfg=None): log_cfgs = [] log_cfg = cfg.get('logcfg') - if log_cfg and isinstance(log_cfg, six.string_types): + if log_cfg and isinstance(log_cfg, str): # If there is a 'logcfg' entry in the config, # respect it, it is the old keyname log_cfgs.append(str(log_cfg)) elif "log_cfgs" in cfg: for a_cfg in cfg['log_cfgs']: - if isinstance(a_cfg, six.string_types): + if isinstance(a_cfg, str): log_cfgs.append(a_cfg) elif isinstance(a_cfg, (collections.Iterable)): cfg_str = [str(c) for c in a_cfg] @@ -100,7 +96,7 @@ def setupLogging(cfg=None): # is acting as a file) pass else: - log_cfg = StringIO(log_cfg) + log_cfg = io.StringIO(log_cfg) # Attempt to load its config logging.config.fileConfig(log_cfg) # The first one to work wins! diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py index 7fbc25ff..668e3cd6 100644 --- a/cloudinit/mergers/__init__.py +++ b/cloudinit/mergers/__init__.py @@ -6,8 +6,6 @@ import re -import six - from cloudinit import importer from cloudinit import log as logging from cloudinit import type_utils @@ -85,7 +83,7 @@ def dict_extract_mergers(config): raw_mergers = config.pop('merge_type', None) if raw_mergers is None: return parsed_mergers - if isinstance(raw_mergers, six.string_types): + if isinstance(raw_mergers, str): return string_extract_mergers(raw_mergers) for m in raw_mergers: if isinstance(m, (dict)): diff --git a/cloudinit/mergers/m_dict.py b/cloudinit/mergers/m_dict.py index 6c5fddc2..93472f13 100644 --- a/cloudinit/mergers/m_dict.py +++ b/cloudinit/mergers/m_dict.py @@ -4,8 +4,6 @@ # # This file is part of cloud-init. See LICENSE file for license information. -import six - DEF_MERGE_TYPE = 'no_replace' MERGE_TYPES = ('replace', DEF_MERGE_TYPE,) @@ -47,7 +45,7 @@ class Merger(object): return new_v if isinstance(new_v, (list, tuple)) and self._recurse_array: return self._merger.merge(old_v, new_v) - if isinstance(new_v, six.string_types) and self._recurse_str: + if isinstance(new_v, str) and self._recurse_str: return self._merger.merge(old_v, new_v) if isinstance(new_v, (dict)) and self._recurse_dict: return self._merger.merge(old_v, new_v) diff --git a/cloudinit/mergers/m_list.py b/cloudinit/mergers/m_list.py index daa0469a..19f32771 100644 --- a/cloudinit/mergers/m_list.py +++ b/cloudinit/mergers/m_list.py @@ -4,8 +4,6 @@ # # This file is part of cloud-init. See LICENSE file for license information. -import six - DEF_MERGE_TYPE = 'replace' MERGE_TYPES = ('append', 'prepend', DEF_MERGE_TYPE, 'no_replace') @@ -63,7 +61,7 @@ class Merger(object): return old_v if isinstance(new_v, (list, tuple)) and self._recurse_array: return self._merger.merge(old_v, new_v) - if isinstance(new_v, six.string_types) and self._recurse_str: + if isinstance(new_v, str) and self._recurse_str: return self._merger.merge(old_v, new_v) if isinstance(new_v, (dict)) and self._recurse_dict: return self._merger.merge(old_v, new_v) diff --git a/cloudinit/mergers/m_str.py b/cloudinit/mergers/m_str.py index 629df58e..539e3e29 100644 --- a/cloudinit/mergers/m_str.py +++ b/cloudinit/mergers/m_str.py @@ -4,8 +4,6 @@ # # This file is part of cloud-init. See LICENSE file for license information. -import six - class Merger(object): def __init__(self, _merger, opts): @@ -23,13 +21,10 @@ class Merger(object): # perform the following action, if appending we will # merge them together, otherwise we will just return value. def _on_str(self, value, merge_with): - if not isinstance(value, six.string_types): + if not isinstance(value, str): return merge_with if not self._append: return merge_with - if isinstance(value, six.text_type): - return value + six.text_type(merge_with) - else: - return value + six.binary_type(merge_with) + return value + merge_with # vi: ts=4 expandtab diff --git a/cloudinit/net/cmdline.py b/cloudinit/net/cmdline.py index 55166ea8..bfb40aae 100755 --- a/cloudinit/net/cmdline.py +++ b/cloudinit/net/cmdline.py @@ -12,8 +12,6 @@ import gzip import io import os -import six - from cloudinit import util from . import get_devicelist @@ -22,8 +20,7 @@ from . import read_sys_net_safe _OPEN_ISCSI_INTERFACE_FILE = "/run/initramfs/open-iscsi.interface" -@six.add_metaclass(abc.ABCMeta) -class InitramfsNetworkConfigSource(object): +class InitramfsNetworkConfigSource(metaclass=abc.ABCMeta): """ABC for net config sources that read config written by initramfses""" @abc.abstractmethod diff --git a/cloudinit/reporting/handlers.py b/cloudinit/reporting/handlers.py index 6605e795..946df7e0 100755 --- a/cloudinit/reporting/handlers.py +++ b/cloudinit/reporting/handlers.py @@ -1,25 +1,19 @@ # This file is part of cloud-init. See LICENSE file for license information. import abc -import uuid import fcntl import json -import six import os +import queue import struct import threading import time +import uuid +from datetime import datetime from cloudinit import log as logging from cloudinit.registry import DictRegistry from cloudinit import (url_helper, util) -from datetime import datetime -from six.moves.queue import Empty as QueueEmptyError - -if six.PY2: - from multiprocessing.queues import JoinableQueue as JQueue -else: - from queue import Queue as JQueue LOG = logging.getLogger(__name__) @@ -28,8 +22,7 @@ class ReportException(Exception): pass -@six.add_metaclass(abc.ABCMeta) -class ReportingHandler(object): +class ReportingHandler(metaclass=abc.ABCMeta): """Base class for report handlers. Implement :meth:`~publish_event` for controlling what @@ -141,7 +134,7 @@ class HyperVKvpReportingHandler(ReportingHandler): self._kvp_file_path) self._event_types = event_types - self.q = JQueue() + self.q = queue.Queue() self.incarnation_no = self._get_incarnation_no() self.event_key_prefix = u"{0}|{1}".format(self.EVENT_PREFIX, self.incarnation_no) @@ -303,7 +296,7 @@ class HyperVKvpReportingHandler(ReportingHandler): # get all the rest of the events in the queue event = self.q.get(block=False) items_from_queue += 1 - except QueueEmptyError: + except queue.Empty: event = None try: self._append_kvp_item(encoded_data) diff --git a/cloudinit/sources/DataSourceOVF.py b/cloudinit/sources/DataSourceOVF.py index 896841e3..9f6e6b6c 100644 --- a/cloudinit/sources/DataSourceOVF.py +++ b/cloudinit/sources/DataSourceOVF.py @@ -8,19 +8,15 @@ # # This file is part of cloud-init. See LICENSE file for license information. -from xml.dom import minidom - import base64 import os import re import time - -import six +from xml.dom import minidom from cloudinit import log as logging from cloudinit import sources from cloudinit import util - from cloudinit.sources.helpers.vmware.imc.config \ import Config from cloudinit.sources.helpers.vmware.imc.config_custom_script \ @@ -458,7 +454,7 @@ def maybe_cdrom_device(devname): """ if not devname: return False - elif not isinstance(devname, six.string_types): + elif not isinstance(devname, str): raise ValueError("Unexpected input for devname: %s" % devname) # resolve '..' and multi '/' elements diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index e6baf8f4..dd93cfd8 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -9,21 +9,19 @@ # This file is part of cloud-init. See LICENSE file for license information. import abc -from collections import namedtuple import copy import json import os -import six +from collections import namedtuple -from cloudinit.atomic_helper import write_json from cloudinit import importer from cloudinit import log as logging from cloudinit import net -from cloudinit.event import EventType from cloudinit import type_utils from cloudinit import user_data as ud from cloudinit import util - +from cloudinit.atomic_helper import write_json +from cloudinit.event import EventType from cloudinit.filters import launch_index from cloudinit.reporting import events @@ -136,8 +134,7 @@ URLParams = namedtuple( 'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries']) -@six.add_metaclass(abc.ABCMeta) -class DataSource(object): +class DataSource(metaclass=abc.ABCMeta): dsmode = DSMODE_NETWORK default_locale = 'en_US.UTF-8' @@ -436,7 +433,7 @@ class DataSource(object): return self._cloud_name if self.metadata and self.metadata.get(METADATA_CLOUD_NAME_KEY): cloud_name = self.metadata.get(METADATA_CLOUD_NAME_KEY) - if isinstance(cloud_name, six.string_types): + if isinstance(cloud_name, str): self._cloud_name = cloud_name.lower() else: self._cloud_name = self._get_cloud_name().lower() @@ -718,8 +715,8 @@ def normalize_pubkey_data(pubkey_data): if not pubkey_data: return keys - if isinstance(pubkey_data, six.string_types): - return str(pubkey_data).splitlines() + if isinstance(pubkey_data, str): + return pubkey_data.splitlines() if isinstance(pubkey_data, (list, set)): return list(pubkey_data) @@ -729,7 +726,7 @@ def normalize_pubkey_data(pubkey_data): # lp:506332 uec metadata service responds with # data that makes boto populate a string for 'klist' rather # than a list. - if isinstance(klist, six.string_types): + if isinstance(klist, str): klist = [klist] if isinstance(klist, (list, set)): for pkey in klist: @@ -837,7 +834,7 @@ def convert_vendordata(data, recurse=True): """ if not data: return None - if isinstance(data, six.string_types): + if isinstance(data, str): return data if isinstance(data, list): return copy.deepcopy(data) diff --git a/cloudinit/sources/helpers/openstack.py b/cloudinit/sources/helpers/openstack.py index 0778f45a..441db506 100644 --- a/cloudinit/sources/helpers/openstack.py +++ b/cloudinit/sources/helpers/openstack.py @@ -12,15 +12,12 @@ import copy import functools import os -import six - from cloudinit import ec2_utils from cloudinit import log as logging from cloudinit import net from cloudinit import sources from cloudinit import url_helper from cloudinit import util - from cloudinit.sources import BrokenMetadata # See https://docs.openstack.org/user-guide/cli-config-drive.html @@ -163,8 +160,7 @@ class SourceMixin(object): return device -@six.add_metaclass(abc.ABCMeta) -class BaseReader(object): +class BaseReader(metaclass=abc.ABCMeta): def __init__(self, base_path): self.base_path = base_path @@ -227,7 +223,7 @@ class BaseReader(object): """ load_json_anytype = functools.partial( - util.load_json, root_types=(dict, list) + six.string_types) + util.load_json, root_types=(dict, list, str)) def datafiles(version): files = {} diff --git a/cloudinit/type_utils.py b/cloudinit/type_utils.py index 6132654b..2c1ae368 100644 --- a/cloudinit/type_utils.py +++ b/cloudinit/type_utils.py @@ -10,29 +10,18 @@ import types -import six - -if six.PY3: - _NAME_TYPES = ( - types.ModuleType, - types.FunctionType, - types.LambdaType, - type, - ) -else: - _NAME_TYPES = ( - types.TypeType, - types.ModuleType, - types.FunctionType, - types.LambdaType, - types.ClassType, - ) +_NAME_TYPES = ( + types.ModuleType, + types.FunctionType, + types.LambdaType, + type, +) def obj_name(obj): if isinstance(obj, _NAME_TYPES): - return six.text_type(obj.__name__) + return str(obj.__name__) else: if not hasattr(obj, '__class__'): return repr(obj) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 1496a471..f6d68436 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -10,32 +10,22 @@ import json import os -import requests -import six import time - from email.utils import parsedate from errno import ENOENT from functools import partial +from http.client import NOT_FOUND from itertools import count -from requests import exceptions +from urllib.parse import urlparse, urlunparse, quote -from six.moves.urllib.parse import ( - urlparse, urlunparse, - quote as urlquote) +import requests +from requests import exceptions from cloudinit import log as logging from cloudinit import version LOG = logging.getLogger(__name__) -if six.PY2: - import httplib - NOT_FOUND = httplib.NOT_FOUND -else: - import http.client - NOT_FOUND = http.client.NOT_FOUND - # Check if requests has ssl support (added in requests >= 0.8.8) SSL_ENABLED = False @@ -71,7 +61,7 @@ def combine_url(base, *add_ons): path = url_parsed[2] if path and not path.endswith("/"): path += "/" - path += urlquote(str(add_on), safe="/:") + path += quote(str(add_on), safe="/:") url_parsed[2] = path return urlunparse(url_parsed) diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index 15af1daf..6f41b03a 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -9,14 +9,11 @@ # This file is part of cloud-init. See LICENSE file for license information. import os - from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart from email.mime.nonmultipart import MIMENonMultipart from email.mime.text import MIMEText -import six - from cloudinit import handlers from cloudinit import log as logging from cloudinit.url_helper import read_file_or_url, UrlError @@ -259,7 +256,7 @@ class UserDataProcessor(object): # filename and type not be present # or # scalar(payload) - if isinstance(ent, six.string_types): + if isinstance(ent, str): ent = {'content': ent} if not isinstance(ent, (dict)): # TODO(harlowja) raise? @@ -269,13 +266,13 @@ class UserDataProcessor(object): mtype = ent.get('type') if not mtype: default = ARCHIVE_UNDEF_TYPE - if isinstance(content, six.binary_type): + if isinstance(content, bytes): default = ARCHIVE_UNDEF_BINARY_TYPE mtype = handlers.type_from_starts_with(content, default) maintype, subtype = mtype.split('/', 1) if maintype == "text": - if isinstance(content, six.binary_type): + if isinstance(content, bytes): content = content.decode() msg = MIMEText(content, _subtype=subtype) else: @@ -348,7 +345,7 @@ def convert_string(raw_data, content_type=NOT_MULTIPART_TYPE): msg.set_payload(data) return msg - if isinstance(raw_data, six.text_type): + if isinstance(raw_data, str): bdata = raw_data.encode('utf-8') else: bdata = raw_data -- cgit v1.2.3 From 87cd040ed8fe7195cbb357ed3bbf53cd2a81436c Mon Sep 17 00:00:00 2001 From: Ryan Harper Date: Wed, 19 Feb 2020 15:01:09 -0600 Subject: ec2: Do not log IMDSv2 token values, instead use REDACTED (#219) Instead of logging the token values used log the headers and replace the actual values with the string 'REDACTED'. This allows users to examine cloud-init.log and see that the IMDSv2 token header is being used but avoids leaving the value used in the log file itself. LP: #1863943 --- cloudinit/ec2_utils.py | 12 ++++++++-- cloudinit/sources/DataSourceEc2.py | 35 +++++++++++++++++++---------- cloudinit/url_helper.py | 27 ++++++++++++++++------ tests/unittests/test_datasource/test_ec2.py | 17 ++++++++++++++ 4 files changed, 70 insertions(+), 21 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index 57708c14..34acfe84 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -142,7 +142,8 @@ def skip_retry_on_codes(status_codes, _request_args, cause): def get_instance_userdata(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, - headers_cb=None, exception_cb=None): + headers_cb=None, headers_redact=None, + exception_cb=None): ud_url = url_helper.combine_url(metadata_address, api_version) ud_url = url_helper.combine_url(ud_url, 'user-data') user_data = '' @@ -155,7 +156,8 @@ def get_instance_userdata(api_version='latest', SKIP_USERDATA_CODES) response = url_helper.read_file_or_url( ud_url, ssl_details=ssl_details, timeout=timeout, - retries=retries, exception_cb=exception_cb, headers_cb=headers_cb) + retries=retries, exception_cb=exception_cb, headers_cb=headers_cb, + headers_redact=headers_redact) user_data = response.contents except url_helper.UrlError as e: if e.code not in SKIP_USERDATA_CODES: @@ -169,11 +171,13 @@ def _get_instance_metadata(tree, api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, leaf_decoder=None, headers_cb=None, + headers_redact=None, exception_cb=None): md_url = url_helper.combine_url(metadata_address, api_version, tree) caller = functools.partial( url_helper.read_file_or_url, ssl_details=ssl_details, timeout=timeout, retries=retries, headers_cb=headers_cb, + headers_redact=headers_redact, exception_cb=exception_cb) def mcaller(url): @@ -197,6 +201,7 @@ def get_instance_metadata(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, leaf_decoder=None, headers_cb=None, + headers_redact=None, exception_cb=None): # Note, 'meta-data' explicitly has trailing /. # this is required for CloudStack (LP: #1356855) @@ -204,6 +209,7 @@ def get_instance_metadata(api_version='latest', metadata_address=metadata_address, ssl_details=ssl_details, timeout=timeout, retries=retries, leaf_decoder=leaf_decoder, + headers_redact=headers_redact, headers_cb=headers_cb, exception_cb=exception_cb) @@ -212,12 +218,14 @@ def get_instance_identity(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, leaf_decoder=None, headers_cb=None, + headers_redact=None, exception_cb=None): return _get_instance_metadata(tree='dynamic/instance-identity', api_version=api_version, metadata_address=metadata_address, ssl_details=ssl_details, timeout=timeout, retries=retries, leaf_decoder=leaf_decoder, + headers_redact=headers_redact, headers_cb=headers_cb, exception_cb=exception_cb) # vi: ts=4 expandtab diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index b9f346a6..0f2bfef4 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -31,6 +31,9 @@ STRICT_ID_DEFAULT = "warn" API_TOKEN_ROUTE = 'latest/api/token' API_TOKEN_DISABLED = '_ec2_disable_api_token' AWS_TOKEN_TTL_SECONDS = '21600' +AWS_TOKEN_PUT_HEADER = 'X-aws-ec2-metadata-token' +AWS_TOKEN_REQ_HEADER = AWS_TOKEN_PUT_HEADER + '-ttl-seconds' +AWS_TOKEN_REDACT = [AWS_TOKEN_PUT_HEADER, AWS_TOKEN_REQ_HEADER] class CloudNames(object): @@ -158,7 +161,8 @@ class DataSourceEc2(sources.DataSource): for api_ver in self.extended_metadata_versions: url = url_tmpl.format(self.metadata_address, api_ver) try: - resp = uhelp.readurl(url=url, headers=headers) + resp = uhelp.readurl(url=url, headers=headers, + headers_redact=AWS_TOKEN_REDACT) except uhelp.UrlError as e: LOG.debug('url %s raised exception %s', url, e) else: @@ -180,6 +184,7 @@ class DataSourceEc2(sources.DataSource): self.identity = ec2.get_instance_identity( api_version, self.metadata_address, headers_cb=self._get_headers, + headers_redact=AWS_TOKEN_REDACT, exception_cb=self._refresh_stale_aws_token_cb).get( 'document', {}) return self.identity.get( @@ -205,7 +210,8 @@ class DataSourceEc2(sources.DataSource): LOG.debug('Fetching Ec2 IMDSv2 API Token') url, response = uhelp.wait_for_url( urls=urls, max_wait=1, timeout=1, status_cb=self._status_cb, - headers_cb=self._get_headers, request_method=request_method) + headers_cb=self._get_headers, request_method=request_method, + headers_redact=AWS_TOKEN_REDACT) if url and response: self._api_token = response @@ -252,7 +258,8 @@ class DataSourceEc2(sources.DataSource): url, _ = uhelp.wait_for_url( urls=urls, max_wait=url_params.max_wait_seconds, timeout=url_params.timeout_seconds, status_cb=LOG.warning, - headers_cb=self._get_headers, request_method=request_method) + headers_redact=AWS_TOKEN_REDACT, headers_cb=self._get_headers, + request_method=request_method) if url: metadata_address = url2base[url] @@ -420,6 +427,7 @@ class DataSourceEc2(sources.DataSource): if not self.wait_for_metadata_service(): return {} api_version = self.get_metadata_api_version() + redact = AWS_TOKEN_REDACT crawled_metadata = {} if self.cloud_name == CloudNames.AWS: exc_cb = self._refresh_stale_aws_token_cb @@ -429,14 +437,17 @@ class DataSourceEc2(sources.DataSource): try: crawled_metadata['user-data'] = ec2.get_instance_userdata( api_version, self.metadata_address, - headers_cb=self._get_headers, exception_cb=exc_cb_ud) + headers_cb=self._get_headers, headers_redact=redact, + exception_cb=exc_cb_ud) crawled_metadata['meta-data'] = ec2.get_instance_metadata( api_version, self.metadata_address, - headers_cb=self._get_headers, exception_cb=exc_cb) + headers_cb=self._get_headers, headers_redact=redact, + exception_cb=exc_cb) if self.cloud_name == CloudNames.AWS: identity = ec2.get_instance_identity( api_version, self.metadata_address, - headers_cb=self._get_headers, exception_cb=exc_cb) + headers_cb=self._get_headers, headers_redact=redact, + exception_cb=exc_cb) crawled_metadata['dynamic'] = {'instance-identity': identity} except Exception: util.logexc( @@ -455,11 +466,12 @@ class DataSourceEc2(sources.DataSource): if self.cloud_name != CloudNames.AWS: return None LOG.debug("Refreshing Ec2 metadata API token") - request_header = {'X-aws-ec2-metadata-token-ttl-seconds': seconds} + request_header = {AWS_TOKEN_REQ_HEADER: seconds} token_url = '{}/{}'.format(self.metadata_address, API_TOKEN_ROUTE) try: - response = uhelp.readurl( - token_url, headers=request_header, request_method="PUT") + response = uhelp.readurl(token_url, headers=request_header, + headers_redact=AWS_TOKEN_REDACT, + request_method="PUT") except uhelp.UrlError as e: LOG.warning( 'Unable to get API token: %s raised exception %s', @@ -500,8 +512,7 @@ class DataSourceEc2(sources.DataSource): API_TOKEN_DISABLED): return {} # Request a 6 hour token if URL is API_TOKEN_ROUTE - request_token_header = { - 'X-aws-ec2-metadata-token-ttl-seconds': AWS_TOKEN_TTL_SECONDS} + request_token_header = {AWS_TOKEN_REQ_HEADER: AWS_TOKEN_TTL_SECONDS} if API_TOKEN_ROUTE in url: return request_token_header if not self._api_token: @@ -511,7 +522,7 @@ class DataSourceEc2(sources.DataSource): self._api_token = self._refresh_api_token() if not self._api_token: return {} - return {'X-aws-ec2-metadata-token': self._api_token} + return {AWS_TOKEN_PUT_HEADER: self._api_token} class DataSourceEc2Local(DataSourceEc2): diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index f6d68436..eeb27aa8 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -8,6 +8,7 @@ # # This file is part of cloud-init. See LICENSE file for license information. +import copy import json import os import time @@ -31,6 +32,7 @@ LOG = logging.getLogger(__name__) SSL_ENABLED = False CONFIG_ENABLED = False # This was added in 0.7 (but taken out in >=1.0) _REQ_VER = None +REDACTED = 'REDACTED' try: from distutils.version import LooseVersion import pkg_resources @@ -189,9 +191,9 @@ def _get_ssl_args(url, ssl_details): def readurl(url, data=None, timeout=None, retries=0, sec_between=1, - headers=None, headers_cb=None, ssl_details=None, - check_status=True, allow_redirects=True, exception_cb=None, - session=None, infinite=False, log_req_resp=True, + headers=None, headers_cb=None, headers_redact=None, + ssl_details=None, check_status=True, allow_redirects=True, + exception_cb=None, session=None, infinite=False, log_req_resp=True, request_method=None): """Wrapper around requests.Session to read the url and retry if necessary @@ -207,6 +209,7 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, :param headers: Optional dict of headers to send during request :param headers_cb: Optional callable returning a dict of values to send as headers during request + :param headers_redact: Optional list of header names to redact from the log :param ssl_details: Optional dict providing key_file, ca_certs, and cert_file keys for use on in ssl connections. :param check_status: Optional boolean set True to raise when HTTPError @@ -233,6 +236,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, req_args['method'] = request_method if timeout is not None: req_args['timeout'] = max(float(timeout), 0) + if headers_redact is None: + headers_redact = [] # It doesn't seem like config # was added in older library versions (or newer ones either), thus we # need to manually do the retries if it wasn't... @@ -277,6 +282,12 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, if k == 'data': continue filtered_req_args[k] = v + if k == 'headers': + for hkey, _hval in v.items(): + if hkey in headers_redact: + filtered_req_args[k][hkey] = ( + copy.deepcopy(req_args[k][hkey])) + filtered_req_args[k][hkey] = REDACTED try: if log_req_resp: @@ -329,8 +340,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, return None # Should throw before this... -def wait_for_url(urls, max_wait=None, timeout=None, - status_cb=None, headers_cb=None, sleep_time=1, +def wait_for_url(urls, max_wait=None, timeout=None, status_cb=None, + headers_cb=None, headers_redact=None, sleep_time=1, exception_cb=None, sleep_time_cb=None, request_method=None): """ urls: a list of urls to try @@ -342,6 +353,7 @@ def wait_for_url(urls, max_wait=None, timeout=None, status_cb: call method with string message when a url is not available headers_cb: call method with single argument of url to get headers for request. + headers_redact: a list of header names to redact from the log exception_cb: call method with 2 arguments 'msg' (per status_cb) and 'exception', the exception that occurred. sleep_time_cb: call method with 2 arguments (response, loop_n) that @@ -405,8 +417,9 @@ def wait_for_url(urls, max_wait=None, timeout=None, headers = {} response = readurl( - url, headers=headers, timeout=timeout, - check_status=False, request_method=request_method) + url, headers=headers, headers_redact=headers_redact, + timeout=timeout, check_status=False, + request_method=request_method) if not response.contents: reason = "empty response [%s]" % (response.code) url_exc = UrlError(ValueError(reason), code=response.code, diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py index 19e1af2b..2a96122f 100644 --- a/tests/unittests/test_datasource/test_ec2.py +++ b/tests/unittests/test_datasource/test_ec2.py @@ -429,6 +429,23 @@ class TestEc2(test_helpers.HttprettyTestCase): self.assertTrue(ds.get_data()) self.assertFalse(ds.is_classic_instance()) + def test_aws_token_redacted(self): + """Verify that aws tokens are redacted when logged.""" + ds = self._setup_ds( + platform_data=self.valid_platform_data, + sys_cfg={'datasource': {'Ec2': {'strict_id': False}}}, + md={'md': DEFAULT_METADATA}) + self.assertTrue(ds.get_data()) + all_logs = self.logs.getvalue().splitlines() + REDACT_TTL = "'X-aws-ec2-metadata-token-ttl-seconds': 'REDACTED'" + REDACT_TOK = "'X-aws-ec2-metadata-token': 'REDACTED'" + logs_with_redacted_ttl = [log for log in all_logs if REDACT_TTL in log] + logs_with_redacted = [log for log in all_logs if REDACT_TOK in log] + logs_with_token = [log for log in all_logs if 'API-TOKEN' in log] + self.assertEqual(1, len(logs_with_redacted_ttl)) + self.assertEqual(79, len(logs_with_redacted)) + self.assertEqual(0, len(logs_with_token)) + @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') def test_valid_platform_with_strict_true(self, m_dhcp): """Valid platform data should return true with strict_id true.""" -- cgit v1.2.3