Start fixing how boto and our url fetching can not

use ssl correctly (they can not do validation due to the underlying usage of urllib/httplib). Adjust to use urllib3 which can in cloud-init url fetching. For now also take the metadata/userdata extraction code from boto (small amount) and fix it in a new local file called 'ec2_utils'.
author: Joshua Harlow <harlowja@yahoo-inc.com> 2012-10-18 10:34:38 -0700
committer: Joshua Harlow <harlowja@yahoo-inc.com> 2012-10-18 10:34:38 -0700
commit: 914c6e86f1689ae186a0db836e7f0304d72c38b4 (patch)
tree: 88e5d3aaa07af5acc6d01a22b6d9e47eacc11cf0
parent: bdaa57bc5b8a75b0891673a7bb0a60c5b02beb7c (diff)
download: vyos-cloud-init-914c6e86f1689ae186a0db836e7f0304d72c38b4.tar.gz
vyos-cloud-init-914c6e86f1689ae186a0db836e7f0304d72c38b4.zip
3 files changed, 171 insertions, 64 deletions
diff --git a/Requires b/Requires
index 4f9311d5..b23dd4e9 100644
--- a/Requires
+++ b/Requires
@@ -26,3 +26,6 @@ pyyaml
 
 # The new main entrypoint uses argparse instead of optparse
 argparse
+
+# Urllib3 handles ssl correctly!
+urllib3
diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py
new file mode 100644
index 00000000..ef7fac7d
--- /dev/null
+++ b/cloudinit/ec2_utils.py
@@ -0,0 +1,135 @@
+# vi: ts=4 expandtab
+#
+#    Copyright (C) 2012 Yahoo! Inc.
+#
+#    Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License version 3, as
+#    published by the Free Software Foundation.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import json
+import urllib
+
+from cloudinit import log as logging
+from cloudinit import url_helper as uh
+from cloudinit import util
+
+LOG = logging.getLogger(__name__)
+
+
+# For now take this and fix it...
+class LazyLoadMetadata(dict):
+    def __init__(self, url, fetch_timeout, num_retries, ssl_details):
+        self._url = url
+        self._num_retries = num_retries
+        self._ssl_details = ssl_details
+        self._fetch_timeout = fetch_timeout
+        self._leaves = {}
+        self._dicts = []
+        response = uh.readurl(url, timeout=fetch_timeout,
+                              retries=num_retries, ssl_details=ssl_details)
+        data = str(response)
+        if data:
+            fields = data.split('\n')
+            for field in fields:
+                if field.endswith('/'):
+                    key = field[0:-1]
+                    self._dicts.append(key)
+                else:
+                    p = field.find('=')
+                    if p > 0:
+                        key = field[p + 1:]
+                        resource = field[0:p] + '/openssh-key'
+                    else:
+                        key = resource = field
+                    self._leaves[key] = resource
+                self[key] = None
+
+    def _materialize(self):
+        for key in self:
+            self[key]
+
+    def __getitem__(self, key):
+        if key not in self:
+            # Allow dict to throw the KeyError
+            return super(LazyLoadMetadata, self).__getitem__(key)
+
+        # Already loaded
+        val = super(LazyLoadMetadata, self).__getitem__(key)
+        if val is not None:
+            return val
+
+        if key in self._leaves:
+            resource = self._leaves[key]
+            new_url = self._url + urllib.quote(resource, safe="/:")
+            response = uh.readurl(new_url, retries=self._num_retries,
+                                  timeout=self._fetch_timeout,
+                                  ssl_details=self._ssl_details)
+            val = str(response)
+            if val and val[0] == '{':
+                val = json.loads(val)
+            else:
+                p = val.find('\n')
+                if p > 0:
+                    val = val.split('\n')
+            self[key] = val
+        elif key in self._dicts:
+            new_url = self._url + key + '/'
+            self[key] = LazyLoadMetadata(new_url,
+                                         num_retries=self._num_retries,
+                                         fetch_timeout=self._fetch_timeout,
+                                         ssl_details=self._ssl_details)
+
+        return super(LazyLoadMetadata, self).__getitem__(key)
+
+    def get(self, key, default=None):
+        try:
+            return self[key]
+        except KeyError:
+            return default
+
+    def values(self):
+        self._materialize()
+        return super(LazyLoadMetadata, self).values()
+
+    def items(self):
+        self._materialize()
+        return super(LazyLoadMetadata, self).items()
+
+    def __str__(self):
+        self._materialize()
+        return super(LazyLoadMetadata, self).__str__()
+
+    def __repr__(self):
+        self._materialize()
+        return super(LazyLoadMetadata, self).__repr__()
+
+
+def get_instance_userdata(url, version='latest', ssl_details=None):
+    ud_url = '%s/%s/user-data' % (url, version)
+    try:
+        response = uh.readurl(ud_url, timeout=5,
+                              retries=10, ssl_details=ssl_details)
+        return str(response)
+    except Exception as e:
+        util.logexc(LOG, "Failed fetching url %s", ud_url)
+        return None
+
+
+def get_instance_metadata(url, version='latest', ssl_details=None):
+    md_url = '%s/%s/meta-data' % (url, version)
+    try:
+        return LazyLoadMetadata(md_url, timeout=5, 
+                                retries=10, ssl_details=ssl_details)
+    except Exception as e:
+        util.logexc(LOG, "Failed fetching url %s", md_url)
+        return None
diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py
index f3e3fd7e..e3f63021 100644
--- a/cloudinit/url_helper.py
+++ b/cloudinit/url_helper.py
@@ -26,7 +26,9 @@ import errno
 import socket
 import time
 import urllib
-import urllib2
+
+from urllib3 import connectionpool
+from urllib3 import util
 
 from cloudinit import log as logging
 from cloudinit import version
@@ -68,71 +70,38 @@ class UrlResponse(object):
             return False
 
 
-def readurl(url, data=None, timeout=None,
-            retries=0, sec_between=1, headers=None):
-
+def readurl(url, data=None, timeout=None, retries=0,
+            headers=None, ssl_details=None):
     req_args = {}
-    req_args['url'] = url
-    if data is not None:
-        req_args['data'] = urllib.urlencode(data)
-
-    if not headers:
-        headers = {
-            'User-Agent': 'Cloud-Init/%s' % (version.version_string()),
+    p_url = util.parse_url(url)
+    if p_url.scheme == 'https' and ssl_details:
+        for k in ['key_file', 'cert_file', 'cert_reqs', 'ca_certs']:
+            if k in ssl_details:
+                req_args[k] = ssl_details[k]
+    with closing(connectionpool.connection_from_url(url, **req_args)) as req_p:
+        retries = max(int(retries), 0)
+        attempts = retries + 1
+        LOG.debug(("Attempting to open '%s' with %s attempts"
+                   " (%s retries, timeout=%s) to be performed"),
+                  url, attempts, retries, timeout)
+        open_args = {
+            'method': 'GET',
+            'retries': retries,
+            'redirect': False,
+            'url': p_url.request_uri,
         }
-
-    req_args['headers'] = headers
-    req = urllib2.Request(**req_args)
-
-    retries = max(retries, 0)
-    attempts = retries + 1
-
-    excepts = []
-    LOG.debug(("Attempting to open '%s' with %s attempts"
-               " (%s retries, timeout=%s) to be performed"),
-              url, attempts, retries, timeout)
-    open_args = {}
-    if timeout is not None:
-        open_args['timeout'] = int(timeout)
-    for i in range(0, attempts):
-        try:
-            with closing(urllib2.urlopen(req, **open_args)) as rh:
-                content = rh.read()
-                status = rh.getcode()
-                if status is None:
-                    # This seems to happen when files are read...
-                    status = 200
-                headers = {}
-                if rh.headers:
-                    headers = dict(rh.headers)
-                LOG.debug("Read from %s (%s, %sb) after %s attempts",
-                          url, status, len(content), (i + 1))
-                return UrlResponse(status, content, headers)
-        except urllib2.HTTPError as e:
-            excepts.append(e)
-        except urllib2.URLError as e:
-            # This can be a message string or
-            # another exception instance
-            # (socket.error for remote URLs, OSError for local URLs).
-            if (isinstance(e.reason, (OSError)) and
-                e.reason.errno == errno.ENOENT):
-                excepts.append(e.reason)
-            else:
-                excepts.append(e)
-        except Exception as e:
-            excepts.append(e)
-        if i + 1 < attempts:
-            LOG.debug("Please wait %s seconds while we wait to try again",
-                     sec_between)
-            time.sleep(sec_between)
-
-    # Didn't work out
-    LOG.debug("Failed reading from %s after %s attempts", url, attempts)
-
-    # It must of errored at least once for code
-    # to get here so re-raise the last error
-    LOG.debug("%s errors occured, re-raising the last one", len(excepts))
-    raise excepts[-1]
+        if data is not None:
+            open_args['body'] = urllib.urlencode(data)
+            open_args['method'] = 'POST'
+        if not headers:
+            headers = {
+                'User-Agent': 'Cloud-Init/%s' % (version.version_string()),
+            }
+        open_args['headers'] = headers
+        if timeout is not None:
+            open_args['timeout'] = max(int(timeout), 0)
+        r = req_p.urlopen(**open_args)
+        return UrlResponse(r.status, r.data, r.headers)
 
 
 def wait_for_url(urls, max_wait=None, timeout=None,
author	Joshua Harlow <harlowja@yahoo-inc.com>	2012-10-18 10:34:38 -0700
committer	Joshua Harlow <harlowja@yahoo-inc.com>	2012-10-18 10:34:38 -0700
commit	914c6e86f1689ae186a0db836e7f0304d72c38b4 (patch)
tree	88e5d3aaa07af5acc6d01a22b6d9e47eacc11cf0
parent	bdaa57bc5b8a75b0891673a7bb0a60c5b02beb7c (diff)
download	vyos-cloud-init-914c6e86f1689ae186a0db836e7f0304d72c38b4.tar.gz vyos-cloud-init-914c6e86f1689ae186a0db836e7f0304d72c38b4.zip