summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChad Smith <chad.smith@canonical.com>2020-03-05 17:38:28 -0700
committerGitHub <noreply@github.com>2020-03-05 17:38:28 -0700
commit1f860e5ac7ebb5b809c72d8703a0b7cb3e84ccd0 (patch)
tree0ba9d804209eeecde0b87f0c68df76e6516056c8
parentfa639704f67539d9c1d8668383f755cb0213fd4a (diff)
downloadvyos-cloud-init-1f860e5ac7ebb5b809c72d8703a0b7cb3e84ccd0.tar.gz
vyos-cloud-init-1f860e5ac7ebb5b809c72d8703a0b7cb3e84ccd0.zip
ec2: Do not fallback to IMDSv1 on EC2 (#216)
The EC2 Data Source needs to handle 3 states of the Instance Metadata Service configured for a given instance: 1. HttpTokens : optional & HttpEndpoint : enabled Either IMDSv2 or IMDSv1 can be used. 2. HttpTokens : required & HttpEndpoint : enabled Calls to IMDS without a valid token (IMDSv1 or IMDSv2 with expired token) will return a 401 error. 3. HttpEndpoint : disabled The IMDS http endpoint will return a 403 error. Previous work to support IMDSv2 in cloud-init handled case 1 and case 2. This commit handles case 3 by bypassing the retry block when IMDS returns HTTP status code >= 400 on official AWS cloud platform. It shaves 2 minutes when rebooting an instance that has its IMDS http token endpoint disabled but creates some inconsistencies. An instance that doesn't set "manual_cache_clean" to "True" will have its /var/lib/cloud/instance symlink removed altogether after it has failed to find a datasource.
-rw-r--r--cloudinit/sources/DataSourceEc2.py75
-rw-r--r--tests/unittests/test_datasource/test_ec2.py48
2 files changed, 106 insertions, 17 deletions
diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py
index 0f2bfef4..8f0d73bb 100644
--- a/cloudinit/sources/DataSourceEc2.py
+++ b/cloudinit/sources/DataSourceEc2.py
@@ -29,7 +29,6 @@ STRICT_ID_PATH = ("datasource", "Ec2", "strict_id")
STRICT_ID_DEFAULT = "warn"
API_TOKEN_ROUTE = 'latest/api/token'
-API_TOKEN_DISABLED = '_ec2_disable_api_token'
AWS_TOKEN_TTL_SECONDS = '21600'
AWS_TOKEN_PUT_HEADER = 'X-aws-ec2-metadata-token'
AWS_TOKEN_REQ_HEADER = AWS_TOKEN_PUT_HEADER + '-ttl-seconds'
@@ -193,6 +192,12 @@ class DataSourceEc2(sources.DataSource):
return self.metadata['instance-id']
def _maybe_fetch_api_token(self, mdurls, timeout=None, max_wait=None):
+ """ Get an API token for EC2 Instance Metadata Service.
+
+ On EC2. IMDS will always answer an API token, unless
+ the instance owner has disabled the IMDS HTTP endpoint or
+ the network topology conflicts with the configured hop-limit.
+ """
if self.cloud_name != CloudNames.AWS:
return
@@ -205,18 +210,33 @@ class DataSourceEc2(sources.DataSource):
urls.append(cur)
url2base[cur] = url
- # use the self._status_cb to check for Read errors, which means
- # we can't reach the API token URL, so we should disable IMDSv2
+ # use the self._imds_exception_cb to check for Read errors
LOG.debug('Fetching Ec2 IMDSv2 API Token')
- url, response = uhelp.wait_for_url(
- urls=urls, max_wait=1, timeout=1, status_cb=self._status_cb,
- headers_cb=self._get_headers, request_method=request_method,
- headers_redact=AWS_TOKEN_REDACT)
+
+ response = None
+ url = None
+ url_params = self.get_url_params()
+ try:
+ url, response = uhelp.wait_for_url(
+ urls=urls, max_wait=url_params.max_wait_seconds,
+ timeout=url_params.timeout_seconds, status_cb=LOG.warning,
+ headers_cb=self._get_headers,
+ exception_cb=self._imds_exception_cb,
+ request_method=request_method,
+ headers_redact=AWS_TOKEN_REDACT)
+ except uhelp.UrlError:
+ # We use the raised exception to interupt the retry loop.
+ # Nothing else to do here.
+ pass
if url and response:
self._api_token = response
return url2base[url]
+ # If we get here, then wait_for_url timed out, waiting for IMDS
+ # or the IMDS HTTP endpoint is disabled
+ return None
+
def wait_for_metadata_service(self):
mcfg = self.ds_cfg
@@ -240,9 +260,11 @@ class DataSourceEc2(sources.DataSource):
# try the api token path first
metadata_address = self._maybe_fetch_api_token(mdurls)
- if not metadata_address:
- if self._api_token == API_TOKEN_DISABLED:
- LOG.warning('Retrying with IMDSv1')
+ # When running on EC2, we always access IMDS with an API token.
+ # If we could not get an API token, then we assume the IMDS
+ # endpoint was disabled and we move on without a data source.
+ # Fallback to IMDSv1 if not running on EC2
+ if not metadata_address and self.cloud_name != CloudNames.AWS:
# if we can't get a token, use instance-id path
urls = []
url2base = {}
@@ -267,6 +289,8 @@ class DataSourceEc2(sources.DataSource):
if metadata_address:
self.metadata_address = metadata_address
LOG.debug("Using metadata source: '%s'", self.metadata_address)
+ elif self.cloud_name == CloudNames.AWS:
+ LOG.warning("IMDS's HTTP endpoint is probably disabled")
else:
LOG.critical("Giving up on md from %s after %s seconds",
urls, int(time.time() - start_time))
@@ -496,11 +520,29 @@ class DataSourceEc2(sources.DataSource):
self._api_token = None
return True # always retry
- def _status_cb(self, msg, exc=None):
- LOG.warning(msg)
- if 'Read timed out' in msg:
- LOG.warning('Cannot use Ec2 IMDSv2 API tokens, using IMDSv1')
- self._api_token = API_TOKEN_DISABLED
+ def _imds_exception_cb(self, msg, exception=None):
+ """Fail quickly on proper AWS if IMDSv2 rejects API token request
+
+ Guidance from Amazon is that if IMDSv2 had disabled token requests
+ by returning a 403, or cloud-init malformed requests resulting in
+ other 40X errors, we want the datasource detection to fail quickly
+ without retries as those symptoms will likely not be resolved by
+ retries.
+
+ Exceptions such as requests.ConnectionError due to IMDS being
+ temporarily unroutable or unavailable will still retry due to the
+ callsite wait_for_url.
+ """
+ if isinstance(exception, uhelp.UrlError):
+ # requests.ConnectionError will have exception.code == None
+ if exception.code and exception.code >= 400:
+ if exception.code == 403:
+ LOG.warning('Ec2 IMDS endpoint returned a 403 error. '
+ 'HTTP endpoint is disabled. Aborting.')
+ else:
+ LOG.warning('Fatal error while requesting '
+ 'Ec2 IMDSv2 API tokens')
+ raise exception
def _get_headers(self, url=''):
"""Return a dict of headers for accessing a url.
@@ -508,8 +550,7 @@ class DataSourceEc2(sources.DataSource):
If _api_token is unset on AWS, attempt to refresh the token via a PUT
and then return the updated token header.
"""
- if self.cloud_name != CloudNames.AWS or (self._api_token ==
- API_TOKEN_DISABLED):
+ if self.cloud_name != CloudNames.AWS:
return {}
# Request a 6 hour token if URL is API_TOKEN_ROUTE
request_token_header = {AWS_TOKEN_REQ_HEADER: AWS_TOKEN_TTL_SECONDS}
diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py
index 2a96122f..78e82c7e 100644
--- a/tests/unittests/test_datasource/test_ec2.py
+++ b/tests/unittests/test_datasource/test_ec2.py
@@ -3,6 +3,7 @@
import copy
import httpretty
import json
+import requests
from unittest import mock
from cloudinit import helpers
@@ -200,6 +201,7 @@ def register_mock_metaserver(base_url, data):
class TestEc2(test_helpers.HttprettyTestCase):
with_logs = True
+ maxDiff = None
valid_platform_data = {
'uuid': 'ec212f79-87d1-2f1d-588f-d86dc0fd5412',
@@ -429,6 +431,52 @@ class TestEc2(test_helpers.HttprettyTestCase):
self.assertTrue(ds.get_data())
self.assertFalse(ds.is_classic_instance())
+ def test_aws_inaccessible_imds_service_fails_with_retries(self):
+ """Inaccessibility of http://169.254.169.254 are retried."""
+ ds = self._setup_ds(
+ platform_data=self.valid_platform_data,
+ sys_cfg={'datasource': {'Ec2': {'strict_id': False}}},
+ md=None)
+
+ conn_error = requests.exceptions.ConnectionError(
+ '[Errno 113] no route to host')
+
+ mock_success = mock.MagicMock(contents=b'fakesuccess')
+ mock_success.ok.return_value = True
+
+ with mock.patch('cloudinit.url_helper.readurl') as m_readurl:
+ m_readurl.side_effect = (conn_error, conn_error, mock_success)
+ with mock.patch('cloudinit.url_helper.time.sleep'):
+ self.assertTrue(ds.wait_for_metadata_service())
+
+ # Just one /latest/api/token request
+ self.assertEqual(3, len(m_readurl.call_args_list))
+ for readurl_call in m_readurl.call_args_list:
+ self.assertIn('latest/api/token', readurl_call[0][0])
+
+ def test_aws_token_403_fails_without_retries(self):
+ """Verify that 403s fetching AWS tokens are not retried."""
+ ds = self._setup_ds(
+ platform_data=self.valid_platform_data,
+ sys_cfg={'datasource': {'Ec2': {'strict_id': False}}},
+ md=None)
+ token_url = self.data_url('latest', data_item='api/token')
+ httpretty.register_uri(httpretty.PUT, token_url, body={}, status=403)
+ self.assertFalse(ds.get_data())
+ # Just one /latest/api/token request
+ logs = self.logs.getvalue()
+ failed_put_log = '"PUT /latest/api/token HTTP/1.1" 403 0'
+ expected_logs = [
+ 'WARNING: Ec2 IMDS endpoint returned a 403 error. HTTP endpoint is'
+ ' disabled. Aborting.',
+ "WARNING: IMDS's HTTP endpoint is probably disabled",
+ failed_put_log
+ ]
+ for log in expected_logs:
+ self.assertIn(log, logs)
+ self.assertEqual(
+ 1, len([l for l in logs.splitlines() if failed_put_log in l]))
+
def test_aws_token_redacted(self):
"""Verify that aws tokens are redacted when logged."""
ds = self._setup_ds(