diff options
author | Chad Smith <chad.smith@canonical.com> | 2020-03-05 17:38:28 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-05 17:38:28 -0700 |
commit | 1f860e5ac7ebb5b809c72d8703a0b7cb3e84ccd0 (patch) | |
tree | 0ba9d804209eeecde0b87f0c68df76e6516056c8 | |
parent | fa639704f67539d9c1d8668383f755cb0213fd4a (diff) | |
download | vyos-cloud-init-1f860e5ac7ebb5b809c72d8703a0b7cb3e84ccd0.tar.gz vyos-cloud-init-1f860e5ac7ebb5b809c72d8703a0b7cb3e84ccd0.zip |
ec2: Do not fallback to IMDSv1 on EC2 (#216)
The EC2 Data Source needs to handle 3 states of the Instance
Metadata Service configured for a given instance:
1. HttpTokens : optional & HttpEndpoint : enabled
Either IMDSv2 or IMDSv1 can be used.
2. HttpTokens : required & HttpEndpoint : enabled
Calls to IMDS without a valid token (IMDSv1 or IMDSv2 with expired token)
will return a 401 error.
3. HttpEndpoint : disabled
The IMDS http endpoint will return a 403 error.
Previous work to support IMDSv2 in cloud-init handled case 1 and case 2.
This commit handles case 3 by bypassing the retry block when IMDS returns HTTP
status code >= 400 on official AWS cloud platform.
It shaves 2 minutes when rebooting an instance that has its IMDS http token endpoint
disabled but creates some inconsistencies. An instance that doesn't set
"manual_cache_clean" to "True" will have its /var/lib/cloud/instance symlink
removed altogether after it has failed to find a datasource.
-rw-r--r-- | cloudinit/sources/DataSourceEc2.py | 75 | ||||
-rw-r--r-- | tests/unittests/test_datasource/test_ec2.py | 48 |
2 files changed, 106 insertions, 17 deletions
diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 0f2bfef4..8f0d73bb 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -29,7 +29,6 @@ STRICT_ID_PATH = ("datasource", "Ec2", "strict_id") STRICT_ID_DEFAULT = "warn" API_TOKEN_ROUTE = 'latest/api/token' -API_TOKEN_DISABLED = '_ec2_disable_api_token' AWS_TOKEN_TTL_SECONDS = '21600' AWS_TOKEN_PUT_HEADER = 'X-aws-ec2-metadata-token' AWS_TOKEN_REQ_HEADER = AWS_TOKEN_PUT_HEADER + '-ttl-seconds' @@ -193,6 +192,12 @@ class DataSourceEc2(sources.DataSource): return self.metadata['instance-id'] def _maybe_fetch_api_token(self, mdurls, timeout=None, max_wait=None): + """ Get an API token for EC2 Instance Metadata Service. + + On EC2. IMDS will always answer an API token, unless + the instance owner has disabled the IMDS HTTP endpoint or + the network topology conflicts with the configured hop-limit. + """ if self.cloud_name != CloudNames.AWS: return @@ -205,18 +210,33 @@ class DataSourceEc2(sources.DataSource): urls.append(cur) url2base[cur] = url - # use the self._status_cb to check for Read errors, which means - # we can't reach the API token URL, so we should disable IMDSv2 + # use the self._imds_exception_cb to check for Read errors LOG.debug('Fetching Ec2 IMDSv2 API Token') - url, response = uhelp.wait_for_url( - urls=urls, max_wait=1, timeout=1, status_cb=self._status_cb, - headers_cb=self._get_headers, request_method=request_method, - headers_redact=AWS_TOKEN_REDACT) + + response = None + url = None + url_params = self.get_url_params() + try: + url, response = uhelp.wait_for_url( + urls=urls, max_wait=url_params.max_wait_seconds, + timeout=url_params.timeout_seconds, status_cb=LOG.warning, + headers_cb=self._get_headers, + exception_cb=self._imds_exception_cb, + request_method=request_method, + headers_redact=AWS_TOKEN_REDACT) + except uhelp.UrlError: + # We use the raised exception to interupt the retry loop. + # Nothing else to do here. + pass if url and response: self._api_token = response return url2base[url] + # If we get here, then wait_for_url timed out, waiting for IMDS + # or the IMDS HTTP endpoint is disabled + return None + def wait_for_metadata_service(self): mcfg = self.ds_cfg @@ -240,9 +260,11 @@ class DataSourceEc2(sources.DataSource): # try the api token path first metadata_address = self._maybe_fetch_api_token(mdurls) - if not metadata_address: - if self._api_token == API_TOKEN_DISABLED: - LOG.warning('Retrying with IMDSv1') + # When running on EC2, we always access IMDS with an API token. + # If we could not get an API token, then we assume the IMDS + # endpoint was disabled and we move on without a data source. + # Fallback to IMDSv1 if not running on EC2 + if not metadata_address and self.cloud_name != CloudNames.AWS: # if we can't get a token, use instance-id path urls = [] url2base = {} @@ -267,6 +289,8 @@ class DataSourceEc2(sources.DataSource): if metadata_address: self.metadata_address = metadata_address LOG.debug("Using metadata source: '%s'", self.metadata_address) + elif self.cloud_name == CloudNames.AWS: + LOG.warning("IMDS's HTTP endpoint is probably disabled") else: LOG.critical("Giving up on md from %s after %s seconds", urls, int(time.time() - start_time)) @@ -496,11 +520,29 @@ class DataSourceEc2(sources.DataSource): self._api_token = None return True # always retry - def _status_cb(self, msg, exc=None): - LOG.warning(msg) - if 'Read timed out' in msg: - LOG.warning('Cannot use Ec2 IMDSv2 API tokens, using IMDSv1') - self._api_token = API_TOKEN_DISABLED + def _imds_exception_cb(self, msg, exception=None): + """Fail quickly on proper AWS if IMDSv2 rejects API token request + + Guidance from Amazon is that if IMDSv2 had disabled token requests + by returning a 403, or cloud-init malformed requests resulting in + other 40X errors, we want the datasource detection to fail quickly + without retries as those symptoms will likely not be resolved by + retries. + + Exceptions such as requests.ConnectionError due to IMDS being + temporarily unroutable or unavailable will still retry due to the + callsite wait_for_url. + """ + if isinstance(exception, uhelp.UrlError): + # requests.ConnectionError will have exception.code == None + if exception.code and exception.code >= 400: + if exception.code == 403: + LOG.warning('Ec2 IMDS endpoint returned a 403 error. ' + 'HTTP endpoint is disabled. Aborting.') + else: + LOG.warning('Fatal error while requesting ' + 'Ec2 IMDSv2 API tokens') + raise exception def _get_headers(self, url=''): """Return a dict of headers for accessing a url. @@ -508,8 +550,7 @@ class DataSourceEc2(sources.DataSource): If _api_token is unset on AWS, attempt to refresh the token via a PUT and then return the updated token header. """ - if self.cloud_name != CloudNames.AWS or (self._api_token == - API_TOKEN_DISABLED): + if self.cloud_name != CloudNames.AWS: return {} # Request a 6 hour token if URL is API_TOKEN_ROUTE request_token_header = {AWS_TOKEN_REQ_HEADER: AWS_TOKEN_TTL_SECONDS} diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py index 2a96122f..78e82c7e 100644 --- a/tests/unittests/test_datasource/test_ec2.py +++ b/tests/unittests/test_datasource/test_ec2.py @@ -3,6 +3,7 @@ import copy import httpretty import json +import requests from unittest import mock from cloudinit import helpers @@ -200,6 +201,7 @@ def register_mock_metaserver(base_url, data): class TestEc2(test_helpers.HttprettyTestCase): with_logs = True + maxDiff = None valid_platform_data = { 'uuid': 'ec212f79-87d1-2f1d-588f-d86dc0fd5412', @@ -429,6 +431,52 @@ class TestEc2(test_helpers.HttprettyTestCase): self.assertTrue(ds.get_data()) self.assertFalse(ds.is_classic_instance()) + def test_aws_inaccessible_imds_service_fails_with_retries(self): + """Inaccessibility of http://169.254.169.254 are retried.""" + ds = self._setup_ds( + platform_data=self.valid_platform_data, + sys_cfg={'datasource': {'Ec2': {'strict_id': False}}}, + md=None) + + conn_error = requests.exceptions.ConnectionError( + '[Errno 113] no route to host') + + mock_success = mock.MagicMock(contents=b'fakesuccess') + mock_success.ok.return_value = True + + with mock.patch('cloudinit.url_helper.readurl') as m_readurl: + m_readurl.side_effect = (conn_error, conn_error, mock_success) + with mock.patch('cloudinit.url_helper.time.sleep'): + self.assertTrue(ds.wait_for_metadata_service()) + + # Just one /latest/api/token request + self.assertEqual(3, len(m_readurl.call_args_list)) + for readurl_call in m_readurl.call_args_list: + self.assertIn('latest/api/token', readurl_call[0][0]) + + def test_aws_token_403_fails_without_retries(self): + """Verify that 403s fetching AWS tokens are not retried.""" + ds = self._setup_ds( + platform_data=self.valid_platform_data, + sys_cfg={'datasource': {'Ec2': {'strict_id': False}}}, + md=None) + token_url = self.data_url('latest', data_item='api/token') + httpretty.register_uri(httpretty.PUT, token_url, body={}, status=403) + self.assertFalse(ds.get_data()) + # Just one /latest/api/token request + logs = self.logs.getvalue() + failed_put_log = '"PUT /latest/api/token HTTP/1.1" 403 0' + expected_logs = [ + 'WARNING: Ec2 IMDS endpoint returned a 403 error. HTTP endpoint is' + ' disabled. Aborting.', + "WARNING: IMDS's HTTP endpoint is probably disabled", + failed_put_log + ] + for log in expected_logs: + self.assertIn(log, logs) + self.assertEqual( + 1, len([l for l in logs.splitlines() if failed_put_log in l])) + def test_aws_token_redacted(self): """Verify that aws tokens are redacted when logged.""" ds = self._setup_ds( |