From 6c7e09cc8ab6955fe2c03a925d74ebdce19e7b56 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Fri, 10 Jan 2014 16:14:54 -0800 Subject: Bring back the ec2 utils, non-boto userdata/metadata reading This replacement uses our own userdata/metadata ec2 webservice parser that we can easily modify, it also automatically allows for reading the ec2 userdata/metdata from files and also brings in the usage of requests instead of boto's usage of urllib which did not support ssl properly. --- cloudinit/ec2_utils.py | 196 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 151 insertions(+), 45 deletions(-) diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index fcd511c5..605154bc 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -16,48 +16,154 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import boto.utils as boto_utils - -# Versions of boto >= 2.6.0 (and possibly 2.5.2) -# try to lazily load the metadata backing, which -# doesn't work so well in cloud-init especially -# since the metadata is serialized and actions are -# performed where the metadata server may be blocked -# (thus the datasource will start failing) resulting -# in url exceptions when fields that do exist (or -# would have existed) do not exist due to the blocking -# that occurred. - -# TODO(harlowja): https://github.com/boto/boto/issues/1401 -# When boto finally moves to using requests, we should be able -# to provide it ssl details, it does not yet, so we can't provide them... - - -def _unlazy_dict(mp): - if not isinstance(mp, (dict)): - return mp - # Walk over the keys/values which - # forces boto to unlazy itself and - # has no effect on dictionaries that - # already have there items. - for (_k, v) in mp.items(): - _unlazy_dict(v) - return mp - - -def get_instance_userdata(api_version, metadata_address): - # Note: boto.utils.get_instance_metadata returns '' for empty string - # so the change from non-true to '' is not specifically necessary, but - # this way cloud-init will get consistent behavior even if boto changed - # in the future to return a None on "no user-data provided". - ud = boto_utils.get_instance_userdata(api_version, None, metadata_address) - if not ud: - ud = '' - return ud - - -def get_instance_metadata(api_version, metadata_address): - metadata = boto_utils.get_instance_metadata(api_version, metadata_address) - if not isinstance(metadata, (dict)): - metadata = {} - return _unlazy_dict(metadata) +from urlparse import (urlparse, urlunparse) + +import functools +import json +import urllib + +from cloudinit import log as logging +from cloudinit import util + +LOG = logging.getLogger(__name__) + + +def combine_url(base, add_on): + base_parsed = list(urlparse(base)) + path = base_parsed[2] + if path and not path.endswith("/"): + path += "/" + path += urllib.quote(str(add_on), safe="/:") + base_parsed[2] = path + return urlunparse(base_parsed) + + +# See: http://bit.ly/TyoUQs +# +# Since boto metadata reader uses the old urllib which does not +# support ssl, we need to ahead and create our own reader which +# works the same as the boto one (for now). +class MetadataMaterializer(object): + def __init__(self, blob, base_url, caller): + self._blob = blob + self._md = None + self._base_url = base_url + self._caller = caller + + def _parse(self, blob): + leaves = {} + children = [] + if not blob: + return (leaves, children) + + def has_children(item): + if item.endswith("/"): + return True + else: + return False + + def get_name(item): + if item.endswith("/"): + return item.rstrip("/") + return item + + for field in blob.splitlines(): + field = field.strip() + field_name = get_name(field) + if not field or not field_name: + continue + if has_children(field): + if field_name not in children: + children.append(field_name) + else: + contents = field.split("=", 1) + resource = field_name + if len(contents) > 1: + # What a PITA... + (ident, sub_contents) = contents + checked_ident = util.safe_int(ident) + if checked_ident is not None: + resource = "%s/openssh-key" % (checked_ident) + field_name = sub_contents + leaves[field_name] = resource + return (leaves, children) + + def materialize(self): + if self._md is not None: + return self._md + self._md = self._materialize(self._blob, self._base_url) + return self._md + + def _decode_leaf_blob(self, blob): + if not blob: + return blob + stripped_blob = blob.strip() + if stripped_blob.startswith("{") and stripped_blob.endswith("}"): + # Assume and try with json + try: + return json.loads(blob) + except (ValueError, TypeError): + pass + if blob.find("\n") != -1: + return blob.splitlines() + return blob + + def _materialize(self, blob, base_url): + (leaves, children) = self._parse(blob) + child_contents = {} + for c in children: + child_url = combine_url(base_url, c) + if not child_url.endswith("/"): + child_url += "/" + child_blob = str(self._caller(child_url)) + child_contents[c] = self._materialize(child_blob, child_url) + leaf_contents = {} + for (field, resource) in leaves.items(): + leaf_url = combine_url(base_url, resource) + leaf_blob = str(self._caller(leaf_url)) + leaf_contents[field] = self._decode_leaf_blob(leaf_blob) + joined = {} + joined.update(child_contents) + for field in leaf_contents.keys(): + if field in joined: + LOG.warn("Duplicate key found in results from %s", base_url) + else: + joined[field] = leaf_contents[field] + return joined + + +def get_instance_userdata(api_version='latest', + metadata_address='http://169.254.169.254', + ssl_details=None, timeout=5, retries=5): + ud_url = combine_url(metadata_address, api_version) + ud_url = combine_url(ud_url, 'user-data') + try: + response = util.read_file_or_url(ud_url, + ssl_details=ssl_details, + timeout=timeout, + retries=retries) + return str(response) + except Exception: + util.logexc(LOG, "Failed fetching userdata from url %s", ud_url) + return None + + +def get_instance_metadata(api_version='latest', + metadata_address='http://169.254.169.254', + ssl_details=None, timeout=5, retries=5): + md_url = combine_url(metadata_address, api_version) + md_url = combine_url(md_url, 'meta-data') + caller = functools.partial(util.read_file_or_url, + ssl_details=ssl_details, timeout=timeout, + retries=retries) + + try: + response = caller(md_url) + materializer = MetadataMaterializer(str(response), md_url, caller) + md = materializer.materialize() + if not isinstance(md, (dict)): + md = {} + return md + except Exception: + util.logexc(LOG, "Failed fetching metadata from url %s", md_url) + return {} -- cgit v1.2.3 From 5cdaaee97ea850adc75a4e88dbd2578c84bded51 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Fri, 10 Jan 2014 17:17:06 -0800 Subject: Add ec2 utils tests and httpretty requirement for http mocking --- Requires | 6 +- cloudinit/ec2_utils.py | 8 +-- tests/unittests/test_ec2_util.py | 133 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 7 deletions(-) create mode 100644 tests/unittests/test_ec2_util.py diff --git a/Requires b/Requires index f19c9691..e847506f 100644 --- a/Requires +++ b/Requires @@ -29,8 +29,8 @@ argparse # Requests handles ssl correctly! requests -# Boto for ec2 -boto - # For patching pieces of cloud-config together jsonpatch + +# For http testing (only needed for testing) +httpretty>=0.7.1 diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index 605154bc..c46adc6b 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -81,9 +81,9 @@ class MetadataMaterializer(object): if len(contents) > 1: # What a PITA... (ident, sub_contents) = contents - checked_ident = util.safe_int(ident) - if checked_ident is not None: - resource = "%s/openssh-key" % (checked_ident) + ident = util.safe_int(ident) + if ident is not None: + resource = "%s/openssh-key" % (ident) field_name = sub_contents leaves[field_name] = resource return (leaves, children) @@ -145,7 +145,7 @@ def get_instance_userdata(api_version='latest', return str(response) except Exception: util.logexc(LOG, "Failed fetching userdata from url %s", ud_url) - return None + return '' def get_instance_metadata(api_version='latest', diff --git a/tests/unittests/test_ec2_util.py b/tests/unittests/test_ec2_util.py new file mode 100644 index 00000000..6d25dcf9 --- /dev/null +++ b/tests/unittests/test_ec2_util.py @@ -0,0 +1,133 @@ +from tests.unittests import helpers + +from cloudinit import ec2_utils as eu + +import httpretty as hp + + +class TestEc2Util(helpers.TestCase): + VERSION = 'latest' + + @hp.activate + def test_userdata_fetch(self): + hp.register_uri(hp.GET, + 'http://169.254.169.254/%s/user-data' % (self.VERSION), + body='stuff', + status=200) + userdata = eu.get_instance_userdata(self.VERSION) + self.assertEquals('stuff', userdata) + + @hp.activate + def test_userdata_fetch_fail_not_found(self): + hp.register_uri(hp.GET, + 'http://169.254.169.254/%s/user-data' % (self.VERSION), + status=404) + userdata = eu.get_instance_userdata(self.VERSION, retries=0) + self.assertEquals('', userdata) + + + @hp.activate + def test_userdata_fetch_fail_server_dead(self): + hp.register_uri(hp.GET, + 'http://169.254.169.254/%s/user-data' % (self.VERSION), + status=500) + userdata = eu.get_instance_userdata(self.VERSION, retries=0) + self.assertEquals('', userdata) + + @hp.activate + def test_metadata_fetch_no_keys(self): + base_url = 'http://169.254.169.254/%s/meta-data' % (self.VERSION) + hp.register_uri(hp.GET, base_url, status=200, + body="\n".join(['hostname', + 'instance-id', + 'ami-launch-index'])) + hp.register_uri(hp.GET, eu.combine_url(base_url, 'hostname'), + status=200, body='ec2.fake.host.name.com') + hp.register_uri(hp.GET, eu.combine_url(base_url, 'instance-id'), + status=200, body='123') + hp.register_uri(hp.GET, eu.combine_url(base_url, 'ami-launch-index'), + status=200, body='1') + md = eu.get_instance_metadata(self.VERSION, retries=0) + self.assertEquals(md['hostname'], 'ec2.fake.host.name.com') + self.assertEquals(md['instance-id'], '123') + self.assertEquals(md['ami-launch-index'], '1') + + @hp.activate + def test_metadata_fetch_key(self): + base_url = 'http://169.254.169.254/%s/meta-data' % (self.VERSION) + hp.register_uri(hp.GET, base_url, status=200, + body="\n".join(['hostname', + 'instance-id', + 'public-keys/'])) + hp.register_uri(hp.GET, eu.combine_url(base_url, 'hostname'), + status=200, body='ec2.fake.host.name.com') + hp.register_uri(hp.GET, eu.combine_url(base_url, 'instance-id'), + status=200, body='123') + hp.register_uri(hp.GET, eu.combine_url(base_url, 'public-keys/'), + status=200, body='0=my-public-key') + hp.register_uri(hp.GET, eu.combine_url(base_url, 'public-keys/'), + status=200, body='0=my-public-key') + hp.register_uri(hp.GET, + eu.combine_url(base_url, 'public-keys/0/openssh-key'), + status=200, body='ssh-rsa AAAA.....wZEf my-public-key') + md = eu.get_instance_metadata(self.VERSION, retries=0, timeout=0.1) + self.assertEquals(md['hostname'], 'ec2.fake.host.name.com') + self.assertEquals(md['instance-id'], '123') + self.assertEquals(1, len(md['public-keys'])) + + @hp.activate + def test_metadata_fetch_key(self): + base_url = 'http://169.254.169.254/%s/meta-data' % (self.VERSION) + hp.register_uri(hp.GET, base_url, status=200, + body="\n".join(['hostname', + 'instance-id', + 'public-keys/'])) + hp.register_uri(hp.GET, eu.combine_url(base_url, 'hostname'), + status=200, body='ec2.fake.host.name.com') + hp.register_uri(hp.GET, eu.combine_url(base_url, 'instance-id'), + status=200, body='123') + hp.register_uri(hp.GET, eu.combine_url(base_url, 'public-keys/'), + status=200, + body="\n".join(['0=my-public-key', '1=my-other-key'])) + hp.register_uri(hp.GET, + eu.combine_url(base_url, 'public-keys/0/openssh-key'), + status=200, body='ssh-rsa AAAA.....wZEf my-public-key') + hp.register_uri(hp.GET, + eu.combine_url(base_url, 'public-keys/1/openssh-key'), + status=200, body='ssh-rsa AAAA.....wZEf my-other-key') + md = eu.get_instance_metadata(self.VERSION, retries=0, timeout=0.1) + self.assertEquals(md['hostname'], 'ec2.fake.host.name.com') + self.assertEquals(md['instance-id'], '123') + self.assertEquals(2, len(md['public-keys'])) + + @hp.activate + def test_metadata_fetch_bdm(self): + base_url = 'http://169.254.169.254/%s/meta-data' % (self.VERSION) + hp.register_uri(hp.GET, base_url, status=200, + body="\n".join(['hostname', + 'instance-id', + 'block-device-mapping/'])) + hp.register_uri(hp.GET, eu.combine_url(base_url, 'hostname'), + status=200, body='ec2.fake.host.name.com') + hp.register_uri(hp.GET, eu.combine_url(base_url, 'instance-id'), + status=200, body='123') + hp.register_uri(hp.GET, + eu.combine_url(base_url, 'block-device-mapping/'), + status=200, + body="\n".join(['ami', 'ephemeral0'])) + hp.register_uri(hp.GET, + eu.combine_url(base_url, 'block-device-mapping/ami'), + status=200, + body="sdb") + hp.register_uri(hp.GET, + eu.combine_url(base_url, + 'block-device-mapping/ephemeral0'), + status=200, + body="sdc") + md = eu.get_instance_metadata(self.VERSION, retries=0, timeout=0.1) + self.assertEquals(md['hostname'], 'ec2.fake.host.name.com') + self.assertEquals(md['instance-id'], '123') + bdm = md['block-device-mapping'] + self.assertEquals(2, len(bdm)) + self.assertEquals(bdm['ami'], 'sdb') + self.assertEquals(bdm['ephemeral0'], 'sdc') -- cgit v1.2.3 From 4b70c9a952f0d12a3379cd5c1d930c20f5154134 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Fri, 10 Jan 2014 17:37:10 -0800 Subject: Add a maybe_json helper function --- cloudinit/ec2_utils.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index c46adc6b..525059f7 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -28,6 +28,17 @@ from cloudinit import util LOG = logging.getLogger(__name__) +def maybe_json(text): + if not text: + return False + text = text.strip() + if text.startswith("{") and text.endswith("}"): + return True + if text.startswith("[") and text.endswith("]"): + return True + return False + + def combine_url(base, add_on): base_parsed = list(urlparse(base)) path = base_parsed[2] @@ -94,16 +105,16 @@ class MetadataMaterializer(object): self._md = self._materialize(self._blob, self._base_url) return self._md - def _decode_leaf_blob(self, blob): + def _decode_leaf_blob(self, field, blob): if not blob: return blob - stripped_blob = blob.strip() - if stripped_blob.startswith("{") and stripped_blob.endswith("}"): - # Assume and try with json + if maybe_json(blob): try: + # Assume it's json, unless it fails parsing... return json.loads(blob) - except (ValueError, TypeError): - pass + except (ValueError, TypeError) as e: + LOG.warn("Field %s looked like json, but it was not: %s", + field, e) if blob.find("\n") != -1: return blob.splitlines() return blob @@ -121,7 +132,7 @@ class MetadataMaterializer(object): for (field, resource) in leaves.items(): leaf_url = combine_url(base_url, resource) leaf_blob = str(self._caller(leaf_url)) - leaf_contents[field] = self._decode_leaf_blob(leaf_blob) + leaf_contents[field] = self._decode_leaf_blob(field, leaf_blob) joined = {} joined.update(child_contents) for field in leaf_contents.keys(): -- cgit v1.2.3 From 6f8c1d62e406675ad8524ce4fa97eac958d42238 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Fri, 10 Jan 2014 17:38:11 -0800 Subject: Only check for json objects instead of also arrays --- cloudinit/ec2_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index 525059f7..1fd674bb 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -28,14 +28,12 @@ from cloudinit import util LOG = logging.getLogger(__name__) -def maybe_json(text): +def maybe_json_object(text): if not text: return False text = text.strip() if text.startswith("{") and text.endswith("}"): return True - if text.startswith("[") and text.endswith("]"): - return True return False @@ -108,7 +106,7 @@ class MetadataMaterializer(object): def _decode_leaf_blob(self, field, blob): if not blob: return blob - if maybe_json(blob): + if maybe_json_object(blob): try: # Assume it's json, unless it fails parsing... return json.loads(blob) -- cgit v1.2.3 From 3e5e7f563837685aeabc2fc67dd6cbb9fc619b0a Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Fri, 10 Jan 2014 17:38:54 -0800 Subject: Updated non-json message --- cloudinit/ec2_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index 1fd674bb..c86623bc 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -111,8 +111,8 @@ class MetadataMaterializer(object): # Assume it's json, unless it fails parsing... return json.loads(blob) except (ValueError, TypeError) as e: - LOG.warn("Field %s looked like json, but it was not: %s", - field, e) + LOG.warn("Field %s looked like a json object, but it was" + " not: %s", field, e) if blob.find("\n") != -1: return blob.splitlines() return blob -- cgit v1.2.3