From 4bc399e0cd0b7e9177f948aecd49f6b8323ff30b Mon Sep 17 00:00:00 2001 From: Ryan Harper Date: Fri, 22 Nov 2019 21:05:44 -0600 Subject: ec2: Add support for AWS IMDS v2 (session-oriented) (#55) * ec2: Add support for AWS IMDS v2 (session-oriented) AWS now supports a new version of fetching Instance Metadata[1]. Update cloud-init's ec2 utility functions and update ec2 derived datasources accordingly. For DataSourceEc2 (versus ec2-look-alikes) cloud-init will issue the PUT request to obtain an API token for the maximum lifetime and then all subsequent interactions with the IMDS will include the token in the header. If the API token endpoint is unreachable on Ec2 platform, log a warning and fallback to using IMDS v1 and which does not use session tokens when communicating with the Instance metadata service. We handle read errors, typically seen if the IMDS is beyond one etwork hop (IMDSv2 responses have a ttl=1), by setting the api token to a disabled value and then using IMDSv1 paths. To support token-based headers, ec2_utils functions were updated to support custom headers_cb and exception_cb callback functions so Ec2 could store, or refresh API tokens in the event of token becoming stale. [1] https://docs.aws.amazon.com/AWSEC2/latest/ \ UserGuide/ec2-instance-metadata.html \ #instance-metadata-v2-how-it-works --- cloudinit/ec2_utils.py | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'cloudinit/ec2_utils.py') diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index 3b7b17f1..57708c14 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -134,25 +134,28 @@ class MetadataMaterializer(object): return joined -def _skip_retry_on_codes(status_codes, _request_args, cause): +def skip_retry_on_codes(status_codes, _request_args, cause): """Returns False if cause.code is in status_codes.""" return cause.code not in status_codes def get_instance_userdata(api_version='latest', metadata_address='http://169.254.169.254', - ssl_details=None, timeout=5, retries=5): + ssl_details=None, timeout=5, retries=5, + headers_cb=None, exception_cb=None): ud_url = url_helper.combine_url(metadata_address, api_version) ud_url = url_helper.combine_url(ud_url, 'user-data') user_data = '' try: - # It is ok for userdata to not exist (thats why we are stopping if - # NOT_FOUND occurs) and just in that case returning an empty string. - exception_cb = functools.partial(_skip_retry_on_codes, - SKIP_USERDATA_CODES) + if not exception_cb: + # It is ok for userdata to not exist (thats why we are stopping if + # NOT_FOUND occurs) and just in that case returning an empty + # string. + exception_cb = functools.partial(skip_retry_on_codes, + SKIP_USERDATA_CODES) response = url_helper.read_file_or_url( ud_url, ssl_details=ssl_details, timeout=timeout, - retries=retries, exception_cb=exception_cb) + retries=retries, exception_cb=exception_cb, headers_cb=headers_cb) user_data = response.contents except url_helper.UrlError as e: if e.code not in SKIP_USERDATA_CODES: @@ -165,11 +168,13 @@ def get_instance_userdata(api_version='latest', def _get_instance_metadata(tree, api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, - leaf_decoder=None): + leaf_decoder=None, headers_cb=None, + exception_cb=None): md_url = url_helper.combine_url(metadata_address, api_version, tree) caller = functools.partial( url_helper.read_file_or_url, ssl_details=ssl_details, - timeout=timeout, retries=retries) + timeout=timeout, retries=retries, headers_cb=headers_cb, + exception_cb=exception_cb) def mcaller(url): return caller(url).contents @@ -191,22 +196,28 @@ def _get_instance_metadata(tree, api_version='latest', def get_instance_metadata(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, - leaf_decoder=None): + leaf_decoder=None, headers_cb=None, + exception_cb=None): # Note, 'meta-data' explicitly has trailing /. # this is required for CloudStack (LP: #1356855) return _get_instance_metadata(tree='meta-data/', api_version=api_version, metadata_address=metadata_address, ssl_details=ssl_details, timeout=timeout, - retries=retries, leaf_decoder=leaf_decoder) + retries=retries, leaf_decoder=leaf_decoder, + headers_cb=headers_cb, + exception_cb=exception_cb) def get_instance_identity(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, - leaf_decoder=None): + leaf_decoder=None, headers_cb=None, + exception_cb=None): return _get_instance_metadata(tree='dynamic/instance-identity', api_version=api_version, metadata_address=metadata_address, ssl_details=ssl_details, timeout=timeout, - retries=retries, leaf_decoder=leaf_decoder) + retries=retries, leaf_decoder=leaf_decoder, + headers_cb=headers_cb, + exception_cb=exception_cb) # vi: ts=4 expandtab -- cgit v1.2.3 From 87cd040ed8fe7195cbb357ed3bbf53cd2a81436c Mon Sep 17 00:00:00 2001 From: Ryan Harper Date: Wed, 19 Feb 2020 15:01:09 -0600 Subject: ec2: Do not log IMDSv2 token values, instead use REDACTED (#219) Instead of logging the token values used log the headers and replace the actual values with the string 'REDACTED'. This allows users to examine cloud-init.log and see that the IMDSv2 token header is being used but avoids leaving the value used in the log file itself. LP: #1863943 --- cloudinit/ec2_utils.py | 12 ++++++++-- cloudinit/sources/DataSourceEc2.py | 35 +++++++++++++++++++---------- cloudinit/url_helper.py | 27 ++++++++++++++++------ tests/unittests/test_datasource/test_ec2.py | 17 ++++++++++++++ 4 files changed, 70 insertions(+), 21 deletions(-) (limited to 'cloudinit/ec2_utils.py') diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index 57708c14..34acfe84 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -142,7 +142,8 @@ def skip_retry_on_codes(status_codes, _request_args, cause): def get_instance_userdata(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, - headers_cb=None, exception_cb=None): + headers_cb=None, headers_redact=None, + exception_cb=None): ud_url = url_helper.combine_url(metadata_address, api_version) ud_url = url_helper.combine_url(ud_url, 'user-data') user_data = '' @@ -155,7 +156,8 @@ def get_instance_userdata(api_version='latest', SKIP_USERDATA_CODES) response = url_helper.read_file_or_url( ud_url, ssl_details=ssl_details, timeout=timeout, - retries=retries, exception_cb=exception_cb, headers_cb=headers_cb) + retries=retries, exception_cb=exception_cb, headers_cb=headers_cb, + headers_redact=headers_redact) user_data = response.contents except url_helper.UrlError as e: if e.code not in SKIP_USERDATA_CODES: @@ -169,11 +171,13 @@ def _get_instance_metadata(tree, api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, leaf_decoder=None, headers_cb=None, + headers_redact=None, exception_cb=None): md_url = url_helper.combine_url(metadata_address, api_version, tree) caller = functools.partial( url_helper.read_file_or_url, ssl_details=ssl_details, timeout=timeout, retries=retries, headers_cb=headers_cb, + headers_redact=headers_redact, exception_cb=exception_cb) def mcaller(url): @@ -197,6 +201,7 @@ def get_instance_metadata(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, leaf_decoder=None, headers_cb=None, + headers_redact=None, exception_cb=None): # Note, 'meta-data' explicitly has trailing /. # this is required for CloudStack (LP: #1356855) @@ -204,6 +209,7 @@ def get_instance_metadata(api_version='latest', metadata_address=metadata_address, ssl_details=ssl_details, timeout=timeout, retries=retries, leaf_decoder=leaf_decoder, + headers_redact=headers_redact, headers_cb=headers_cb, exception_cb=exception_cb) @@ -212,12 +218,14 @@ def get_instance_identity(api_version='latest', metadata_address='http://169.254.169.254', ssl_details=None, timeout=5, retries=5, leaf_decoder=None, headers_cb=None, + headers_redact=None, exception_cb=None): return _get_instance_metadata(tree='dynamic/instance-identity', api_version=api_version, metadata_address=metadata_address, ssl_details=ssl_details, timeout=timeout, retries=retries, leaf_decoder=leaf_decoder, + headers_redact=headers_redact, headers_cb=headers_cb, exception_cb=exception_cb) # vi: ts=4 expandtab diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index b9f346a6..0f2bfef4 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -31,6 +31,9 @@ STRICT_ID_DEFAULT = "warn" API_TOKEN_ROUTE = 'latest/api/token' API_TOKEN_DISABLED = '_ec2_disable_api_token' AWS_TOKEN_TTL_SECONDS = '21600' +AWS_TOKEN_PUT_HEADER = 'X-aws-ec2-metadata-token' +AWS_TOKEN_REQ_HEADER = AWS_TOKEN_PUT_HEADER + '-ttl-seconds' +AWS_TOKEN_REDACT = [AWS_TOKEN_PUT_HEADER, AWS_TOKEN_REQ_HEADER] class CloudNames(object): @@ -158,7 +161,8 @@ class DataSourceEc2(sources.DataSource): for api_ver in self.extended_metadata_versions: url = url_tmpl.format(self.metadata_address, api_ver) try: - resp = uhelp.readurl(url=url, headers=headers) + resp = uhelp.readurl(url=url, headers=headers, + headers_redact=AWS_TOKEN_REDACT) except uhelp.UrlError as e: LOG.debug('url %s raised exception %s', url, e) else: @@ -180,6 +184,7 @@ class DataSourceEc2(sources.DataSource): self.identity = ec2.get_instance_identity( api_version, self.metadata_address, headers_cb=self._get_headers, + headers_redact=AWS_TOKEN_REDACT, exception_cb=self._refresh_stale_aws_token_cb).get( 'document', {}) return self.identity.get( @@ -205,7 +210,8 @@ class DataSourceEc2(sources.DataSource): LOG.debug('Fetching Ec2 IMDSv2 API Token') url, response = uhelp.wait_for_url( urls=urls, max_wait=1, timeout=1, status_cb=self._status_cb, - headers_cb=self._get_headers, request_method=request_method) + headers_cb=self._get_headers, request_method=request_method, + headers_redact=AWS_TOKEN_REDACT) if url and response: self._api_token = response @@ -252,7 +258,8 @@ class DataSourceEc2(sources.DataSource): url, _ = uhelp.wait_for_url( urls=urls, max_wait=url_params.max_wait_seconds, timeout=url_params.timeout_seconds, status_cb=LOG.warning, - headers_cb=self._get_headers, request_method=request_method) + headers_redact=AWS_TOKEN_REDACT, headers_cb=self._get_headers, + request_method=request_method) if url: metadata_address = url2base[url] @@ -420,6 +427,7 @@ class DataSourceEc2(sources.DataSource): if not self.wait_for_metadata_service(): return {} api_version = self.get_metadata_api_version() + redact = AWS_TOKEN_REDACT crawled_metadata = {} if self.cloud_name == CloudNames.AWS: exc_cb = self._refresh_stale_aws_token_cb @@ -429,14 +437,17 @@ class DataSourceEc2(sources.DataSource): try: crawled_metadata['user-data'] = ec2.get_instance_userdata( api_version, self.metadata_address, - headers_cb=self._get_headers, exception_cb=exc_cb_ud) + headers_cb=self._get_headers, headers_redact=redact, + exception_cb=exc_cb_ud) crawled_metadata['meta-data'] = ec2.get_instance_metadata( api_version, self.metadata_address, - headers_cb=self._get_headers, exception_cb=exc_cb) + headers_cb=self._get_headers, headers_redact=redact, + exception_cb=exc_cb) if self.cloud_name == CloudNames.AWS: identity = ec2.get_instance_identity( api_version, self.metadata_address, - headers_cb=self._get_headers, exception_cb=exc_cb) + headers_cb=self._get_headers, headers_redact=redact, + exception_cb=exc_cb) crawled_metadata['dynamic'] = {'instance-identity': identity} except Exception: util.logexc( @@ -455,11 +466,12 @@ class DataSourceEc2(sources.DataSource): if self.cloud_name != CloudNames.AWS: return None LOG.debug("Refreshing Ec2 metadata API token") - request_header = {'X-aws-ec2-metadata-token-ttl-seconds': seconds} + request_header = {AWS_TOKEN_REQ_HEADER: seconds} token_url = '{}/{}'.format(self.metadata_address, API_TOKEN_ROUTE) try: - response = uhelp.readurl( - token_url, headers=request_header, request_method="PUT") + response = uhelp.readurl(token_url, headers=request_header, + headers_redact=AWS_TOKEN_REDACT, + request_method="PUT") except uhelp.UrlError as e: LOG.warning( 'Unable to get API token: %s raised exception %s', @@ -500,8 +512,7 @@ class DataSourceEc2(sources.DataSource): API_TOKEN_DISABLED): return {} # Request a 6 hour token if URL is API_TOKEN_ROUTE - request_token_header = { - 'X-aws-ec2-metadata-token-ttl-seconds': AWS_TOKEN_TTL_SECONDS} + request_token_header = {AWS_TOKEN_REQ_HEADER: AWS_TOKEN_TTL_SECONDS} if API_TOKEN_ROUTE in url: return request_token_header if not self._api_token: @@ -511,7 +522,7 @@ class DataSourceEc2(sources.DataSource): self._api_token = self._refresh_api_token() if not self._api_token: return {} - return {'X-aws-ec2-metadata-token': self._api_token} + return {AWS_TOKEN_PUT_HEADER: self._api_token} class DataSourceEc2Local(DataSourceEc2): diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index f6d68436..eeb27aa8 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -8,6 +8,7 @@ # # This file is part of cloud-init. See LICENSE file for license information. +import copy import json import os import time @@ -31,6 +32,7 @@ LOG = logging.getLogger(__name__) SSL_ENABLED = False CONFIG_ENABLED = False # This was added in 0.7 (but taken out in >=1.0) _REQ_VER = None +REDACTED = 'REDACTED' try: from distutils.version import LooseVersion import pkg_resources @@ -189,9 +191,9 @@ def _get_ssl_args(url, ssl_details): def readurl(url, data=None, timeout=None, retries=0, sec_between=1, - headers=None, headers_cb=None, ssl_details=None, - check_status=True, allow_redirects=True, exception_cb=None, - session=None, infinite=False, log_req_resp=True, + headers=None, headers_cb=None, headers_redact=None, + ssl_details=None, check_status=True, allow_redirects=True, + exception_cb=None, session=None, infinite=False, log_req_resp=True, request_method=None): """Wrapper around requests.Session to read the url and retry if necessary @@ -207,6 +209,7 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, :param headers: Optional dict of headers to send during request :param headers_cb: Optional callable returning a dict of values to send as headers during request + :param headers_redact: Optional list of header names to redact from the log :param ssl_details: Optional dict providing key_file, ca_certs, and cert_file keys for use on in ssl connections. :param check_status: Optional boolean set True to raise when HTTPError @@ -233,6 +236,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, req_args['method'] = request_method if timeout is not None: req_args['timeout'] = max(float(timeout), 0) + if headers_redact is None: + headers_redact = [] # It doesn't seem like config # was added in older library versions (or newer ones either), thus we # need to manually do the retries if it wasn't... @@ -277,6 +282,12 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, if k == 'data': continue filtered_req_args[k] = v + if k == 'headers': + for hkey, _hval in v.items(): + if hkey in headers_redact: + filtered_req_args[k][hkey] = ( + copy.deepcopy(req_args[k][hkey])) + filtered_req_args[k][hkey] = REDACTED try: if log_req_resp: @@ -329,8 +340,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, return None # Should throw before this... -def wait_for_url(urls, max_wait=None, timeout=None, - status_cb=None, headers_cb=None, sleep_time=1, +def wait_for_url(urls, max_wait=None, timeout=None, status_cb=None, + headers_cb=None, headers_redact=None, sleep_time=1, exception_cb=None, sleep_time_cb=None, request_method=None): """ urls: a list of urls to try @@ -342,6 +353,7 @@ def wait_for_url(urls, max_wait=None, timeout=None, status_cb: call method with string message when a url is not available headers_cb: call method with single argument of url to get headers for request. + headers_redact: a list of header names to redact from the log exception_cb: call method with 2 arguments 'msg' (per status_cb) and 'exception', the exception that occurred. sleep_time_cb: call method with 2 arguments (response, loop_n) that @@ -405,8 +417,9 @@ def wait_for_url(urls, max_wait=None, timeout=None, headers = {} response = readurl( - url, headers=headers, timeout=timeout, - check_status=False, request_method=request_method) + url, headers=headers, headers_redact=headers_redact, + timeout=timeout, check_status=False, + request_method=request_method) if not response.contents: reason = "empty response [%s]" % (response.code) url_exc = UrlError(ValueError(reason), code=response.code, diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py index 19e1af2b..2a96122f 100644 --- a/tests/unittests/test_datasource/test_ec2.py +++ b/tests/unittests/test_datasource/test_ec2.py @@ -429,6 +429,23 @@ class TestEc2(test_helpers.HttprettyTestCase): self.assertTrue(ds.get_data()) self.assertFalse(ds.is_classic_instance()) + def test_aws_token_redacted(self): + """Verify that aws tokens are redacted when logged.""" + ds = self._setup_ds( + platform_data=self.valid_platform_data, + sys_cfg={'datasource': {'Ec2': {'strict_id': False}}}, + md={'md': DEFAULT_METADATA}) + self.assertTrue(ds.get_data()) + all_logs = self.logs.getvalue().splitlines() + REDACT_TTL = "'X-aws-ec2-metadata-token-ttl-seconds': 'REDACTED'" + REDACT_TOK = "'X-aws-ec2-metadata-token': 'REDACTED'" + logs_with_redacted_ttl = [log for log in all_logs if REDACT_TTL in log] + logs_with_redacted = [log for log in all_logs if REDACT_TOK in log] + logs_with_token = [log for log in all_logs if 'API-TOKEN' in log] + self.assertEqual(1, len(logs_with_redacted_ttl)) + self.assertEqual(79, len(logs_with_redacted)) + self.assertEqual(0, len(logs_with_token)) + @mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery') def test_valid_platform_with_strict_true(self, m_dhcp): """Valid platform data should return true with strict_id true.""" -- cgit v1.2.3