9 files changed, 533 insertions, 172 deletions
diff --git a/cloudinit/config/cc_phone_home.py b/cloudinit/config/cc_phone_home.py
index 886487f8..c873c8a8 100644
--- a/cloudinit/config/cc_phone_home.py
+++ b/cloudinit/config/cc_phone_home.py
@@ -19,7 +19,6 @@
 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from cloudinit import templater
-from cloudinit import url_helper as uhelp
 from cloudinit import util
 
 from cloudinit.settings import PER_INSTANCE
@@ -112,7 +111,9 @@ def handle(name, cfg, cloud, log, args):
     }
     url = templater.render_string(url, url_params)
     try:
-        uhelp.readurl(url, data=real_submit_keys, retries=tries, sec_between=3)
+        util.read_file_or_url(url, data=real_submit_keys,
+                              retries=tries, sec_between=3,
+                              ssl_details=util.fetch_ssl_details(cloud.paths))
     except:
         util.logexc(log, ("Failed to post phone home data to"
                           " %s in %s tries"), url, tries)
diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py
index 46b93f39..4812eccb 100644
--- a/cloudinit/ec2_utils.py
+++ b/cloudinit/ec2_utils.py
@@ -16,44 +16,142 @@
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-import boto.utils as boto_utils
-
-# Versions of boto >= 2.6.0 (and possibly 2.5.2)
-# try to lazily load the metadata backing, which
-# doesn't work so well in cloud-init especially
-# since the metadata is serialized and actions are
-# performed where the metadata server may be blocked
-# (thus the datasource will start failing) resulting
-# in url exceptions when fields that do exist (or
-# would have existed) do not exist due to the blocking
-# that occurred.
-
-
-def _unlazy_dict(mp):
-    if not isinstance(mp, (dict)):
-        return mp
-    # Walk over the keys/values which
-    # forces boto to unlazy itself and
-    # has no effect on dictionaries that
-    # already have there items.
-    for (_k, v) in mp.items():
-        _unlazy_dict(v)
-    return mp
-
-
-def get_instance_userdata(api_version, metadata_address):
-    # Note: boto.utils.get_instance_metadata returns '' for empty string
-    # so the change from non-true to '' is not specifically necessary, but
-    # this way cloud-init will get consistent behavior even if boto changed
-    # in the future to return a None on "no user-data provided".
-    ud = boto_utils.get_instance_userdata(api_version, None, metadata_address)
-    if not ud:
-        ud = ''
-    return ud
-
-
-def get_instance_metadata(api_version, metadata_address):
-    metadata = boto_utils.get_instance_metadata(api_version, metadata_address)
-    if not isinstance(metadata, (dict)):
-        metadata = {}
-    return _unlazy_dict(metadata)
+from urlparse import (urlparse, urlunparse)
+
+import json
+import urllib
+
+from cloudinit import log as logging
+from cloudinit import util
+
+LOG = logging.getLogger(__name__)
+
+
+def combine_url(base, add_on):
+    base_parsed = list(urlparse(base))
+    path = base_parsed[2]
+    if path and not path.endswith("/"):
+        path += "/"
+    path += urllib.quote(str(add_on), safe="/:")
+    base_parsed[2] = path
+    return urlunparse(base_parsed)
+
+
+# See: http://bit.ly/TyoUQs
+#
+# Since boto metadata reader uses the old urllib which does not
+# support ssl, we need to ahead and create our own reader which
+# works the same as the boto one (for now).
+class MetadataMaterializer(object):
+    def __init__(self, blob, base_url, ssl_details):
+        self._blob = blob
+        self._md = None
+        self._base_url = base_url
+        self._ssl_details = ssl_details
+
+    def _parse(self, blob):
+        leaves = {}
+        children = []
+        if not blob:
+            return (leaves, children)
+
+        def has_children(item):
+            if item.endswith("/"):
+                return True
+            else:
+                return False
+
+        def get_name(item):
+            if item.endswith("/"):
+                return item.rstrip("/")
+            return item
+
+        for field in blob.splitlines():
+            field = field.strip()
+            field_name = get_name(field)
+            if not field or not field_name:
+                continue
+            if has_children(field):
+                if field_name not in children:
+                    children.append(field_name)
+            else:
+                contents = field.split("=", 1)
+                resource = field_name
+                if len(contents) > 1:
+                    # What a PITA...
+                    (ident, sub_contents) = contents
+                    checked_ident = util.safe_int(ident)
+                    if checked_ident is not None:
+                        resource = "%s/openssh-key" % (checked_ident)
+                        field_name = sub_contents
+                leaves[field_name] = resource
+        return (leaves, children)
+
+    def materialize(self):
+        if self._md is not None:
+            return self._md
+        self._md = self._materialize(self._blob, self._base_url)
+        return self._md
+
+    def _fetch_url(self, url):
+        response = util.read_file_or_url(url, ssl_details=self._ssl_details)
+        return str(response)
+
+    def _decode_leaf_blob(self, blob):
+        if not blob:
+            return blob
+        stripped_blob = blob.strip()
+        if stripped_blob.startswith("{") and stripped_blob.endswith("}"):
+            # Assume and try with json
+            try:
+                return json.loads(blob)
+            except (ValueError, TypeError):
+                pass
+        if blob.find("\n") != -1:
+            return blob.splitlines()
+        return blob
+
+    def _materialize(self, blob, base_url):
+        (leaves, children) = self._parse(blob)
+        child_contents = {}
+        for c in children:
+            child_url = combine_url(base_url, c)
+            if not child_url.endswith("/"):
+                child_url += "/"
+            child_blob = self._fetch_url(child_url)
+            child_contents[c] = self._materialize(child_blob, child_url)
+        leaf_contents = {}
+        for (field, resource) in leaves.items():
+            leaf_url = combine_url(base_url, resource)
+            leaf_blob = self._fetch_url(leaf_url)
+            leaf_contents[field] = self._decode_leaf_blob(leaf_blob)
+        joined = {}
+        joined.update(child_contents)
+        for field in leaf_contents.keys():
+            if field in joined:
+                LOG.warn("Duplicate key found in results from %s", base_url)
+            else:
+                joined[field] = leaf_contents[field]
+        return joined
+
+
+def get_instance_userdata(api_version, metadata_address, ssl_details=None):
+    ud_url = combine_url(metadata_address, api_version)
+    ud_url = combine_url(ud_url, 'user-data')
+    try:
+        response = util.read_file_or_url(ud_url, ssl_details=ssl_details)
+        return str(response)
+    except Exception:
+        util.logexc(LOG, "Failed fetching userdata from url %s", ud_url)
+        return None
+
+def get_instance_metadata(api_version, metadata_address, ssl_details=None):
+    md_url = combine_url(metadata_address, api_version)
+    md_url = combine_url(md_url, 'meta-data')
+    try:
+        response = util.read_file_or_url(md_url, ssl_details=ssl_details)
+        materializer = MetadataMaterializer(str(response), md_url, ssl_details)
+        return materializer.materialize()
+    except Exception:
+        util.logexc(LOG, "Failed fetching metadata from url %s", md_url)
+        return None
diff --git a/cloudinit/ec2_utils.py.moved b/cloudinit/ec2_utils.py.moved
new file mode 100644
index 00000000..4812eccb
--- /dev/null
+++ b/cloudinit/ec2_utils.py.moved
@@ -0,0 +1,157 @@
+# vi: ts=4 expandtab
+#
+#    Copyright (C) 2012 Yahoo! Inc.
+#
+#    Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License version 3, as
+#    published by the Free Software Foundation.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from urlparse import (urlparse, urlunparse)
+
+import json
+import urllib
+
+from cloudinit import log as logging
+from cloudinit import util
+
+LOG = logging.getLogger(__name__)
+
+
+def combine_url(base, add_on):
+    base_parsed = list(urlparse(base))
+    path = base_parsed[2]
+    if path and not path.endswith("/"):
+        path += "/"
+    path += urllib.quote(str(add_on), safe="/:")
+    base_parsed[2] = path
+    return urlunparse(base_parsed)
+
+
+# See: http://bit.ly/TyoUQs
+#
+# Since boto metadata reader uses the old urllib which does not
+# support ssl, we need to ahead and create our own reader which
+# works the same as the boto one (for now).
+class MetadataMaterializer(object):
+    def __init__(self, blob, base_url, ssl_details):
+        self._blob = blob
+        self._md = None
+        self._base_url = base_url
+        self._ssl_details = ssl_details
+
+    def _parse(self, blob):
+        leaves = {}
+        children = []
+        if not blob:
+            return (leaves, children)
+
+        def has_children(item):
+            if item.endswith("/"):
+                return True
+            else:
+                return False
+
+        def get_name(item):
+            if item.endswith("/"):
+                return item.rstrip("/")
+            return item
+
+        for field in blob.splitlines():
+            field = field.strip()
+            field_name = get_name(field)
+            if not field or not field_name:
+                continue
+            if has_children(field):
+                if field_name not in children:
+                    children.append(field_name)
+            else:
+                contents = field.split("=", 1)
+                resource = field_name
+                if len(contents) > 1:
+                    # What a PITA...
+                    (ident, sub_contents) = contents
+                    checked_ident = util.safe_int(ident)
+                    if checked_ident is not None:
+                        resource = "%s/openssh-key" % (checked_ident)
+                        field_name = sub_contents
+                leaves[field_name] = resource
+        return (leaves, children)
+
+    def materialize(self):
+        if self._md is not None:
+            return self._md
+        self._md = self._materialize(self._blob, self._base_url)
+        return self._md
+
+    def _fetch_url(self, url):
+        response = util.read_file_or_url(url, ssl_details=self._ssl_details)
+        return str(response)
+
+    def _decode_leaf_blob(self, blob):
+        if not blob:
+            return blob
+        stripped_blob = blob.strip()
+        if stripped_blob.startswith("{") and stripped_blob.endswith("}"):
+            # Assume and try with json
+            try:
+                return json.loads(blob)
+            except (ValueError, TypeError):
+                pass
+        if blob.find("\n") != -1:
+            return blob.splitlines()
+        return blob
+
+    def _materialize(self, blob, base_url):
+        (leaves, children) = self._parse(blob)
+        child_contents = {}
+        for c in children:
+            child_url = combine_url(base_url, c)
+            if not child_url.endswith("/"):
+                child_url += "/"
+            child_blob = self._fetch_url(child_url)
+            child_contents[c] = self._materialize(child_blob, child_url)
+        leaf_contents = {}
+        for (field, resource) in leaves.items():
+            leaf_url = combine_url(base_url, resource)
+            leaf_blob = self._fetch_url(leaf_url)
+            leaf_contents[field] = self._decode_leaf_blob(leaf_blob)
+        joined = {}
+        joined.update(child_contents)
+        for field in leaf_contents.keys():
+            if field in joined:
+                LOG.warn("Duplicate key found in results from %s", base_url)
+            else:
+                joined[field] = leaf_contents[field]
+        return joined
+
+
+def get_instance_userdata(api_version, metadata_address, ssl_details=None):
+    ud_url = combine_url(metadata_address, api_version)
+    ud_url = combine_url(ud_url, 'user-data')
+    try:
+        response = util.read_file_or_url(ud_url, ssl_details=ssl_details)
+        return str(response)
+    except Exception:
+        util.logexc(LOG, "Failed fetching userdata from url %s", ud_url)
+        return None
+
+def get_instance_metadata(api_version, metadata_address, ssl_details=None):
+    md_url = combine_url(metadata_address, api_version)
+    md_url = combine_url(md_url, 'meta-data')
+    try:
+        response = util.read_file_or_url(md_url, ssl_details=ssl_details)
+        materializer = MetadataMaterializer(str(response), md_url, ssl_details)
+        return materializer.materialize()
+    except Exception:
+        util.logexc(LOG, "Failed fetching metadata from url %s", md_url)
+        return None
diff --git a/cloudinit/sources/DataSourceCloudStack.py b/cloudinit/sources/DataSourceCloudStack.py
index 275caf0d..1335b63d 100644
--- a/cloudinit/sources/DataSourceCloudStack.py
+++ b/cloudinit/sources/DataSourceCloudStack.py
@@ -25,7 +25,7 @@
 import os
 import time
 
-from cloudinit import ec2_utils as ec2
+from cloudinit import ec2_utils
 from cloudinit import log as logging
 from cloudinit import sources
 from cloudinit import url_helper as uhelp
@@ -104,10 +104,14 @@ class DataSourceCloudStack(sources.DataSource):
             if not self.wait_for_metadata_service():
                 return False
             start_time = time.time()
-            self.userdata_raw = ec2.get_instance_userdata(self.api_ver,
-                self.metadata_address)
-            self.metadata = ec2.get_instance_metadata(self.api_ver,
-                                                      self.metadata_address)
+            md_addr = self.metadata_address
+            ssl_details = util.fetch_ssl_details(self.paths)
+            self.userdata_raw = ec2_utils.get_instance_userdata(self.api_ver,
+                                                                md_addr,
+                                                                ssl_details)
+            self.metadata = ec2_utils.get_instance_metadata(self.api_ver,
+                                                            md_addr,
+                                                            ssl_details)
             LOG.debug("Crawl of metadata service took %s seconds",
                       int(time.time() - start_time))
             return True
diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py
index 2db53446..0a58820b 100644
--- a/cloudinit/sources/DataSourceEc2.py
+++ b/cloudinit/sources/DataSourceEc2.py
@@ -23,7 +23,7 @@
 import os
 import time
 
-from cloudinit import ec2_utils as ec2
+from cloudinit import ec2_utils
 from cloudinit import log as logging
 from cloudinit import sources
 from cloudinit import url_helper as uhelp
@@ -64,10 +64,14 @@ class DataSourceEc2(sources.DataSource):
             if not self.wait_for_metadata_service():
                 return False
             start_time = time.time()
-            self.userdata_raw = ec2.get_instance_userdata(self.api_ver,
-                self.metadata_address)
-            self.metadata = ec2.get_instance_metadata(self.api_ver,
-                                                      self.metadata_address)
+            md_addr = self.metadata_address
+            ssl_details = util.fetch_ssl_details(self.paths)
+            self.userdata_raw = ec2_utils.get_instance_userdata(self.api_ver,
+                                                                md_addr,
+                                                                ssl_details)
+            self.metadata = ec2_utils.get_instance_metadata(self.api_ver,
+                                                            md_addr,
+                                                            ssl_details)
             LOG.debug("Crawl of metadata service took %s seconds",
                        int(time.time() - start_time))
             return True
@@ -136,7 +140,7 @@ class DataSourceEc2(sources.DataSource):
 
         start_time = time.time()
         url = uhelp.wait_for_url(urls=urls, max_wait=max_wait,
-                                timeout=timeout, status_cb=LOG.warn)
+                                 timeout=timeout, status_cb=LOG.warn)
 
         if url:
             LOG.debug("Using metadata source: '%s'", url2base[url])
diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py
index b55d8a21..6e1133b2 100644
--- a/cloudinit/sources/DataSourceMAAS.py
+++ b/cloudinit/sources/DataSourceMAAS.py
@@ -25,9 +25,11 @@ import os
 import time
 import urllib2
 
+import requests
+
 from cloudinit import log as logging
 from cloudinit import sources
-from cloudinit import url_helper as uhelp
+from cloudinit import url_helper
 from cloudinit import util
 
 LOG = logging.getLogger(__name__)
@@ -79,7 +81,8 @@ class DataSourceMAAS(sources.DataSource):
             self.base_url = url
 
             (userdata, metadata) = read_maas_seed_url(self.base_url,
-                                                      self.md_headers)
+                                                      self.md_headers,
+                                                      paths=self.paths)
             self.userdata_raw = userdata
             self.metadata = metadata
             return True
@@ -139,7 +142,7 @@ class DataSourceMAAS(sources.DataSource):
             LOG.debug("Using metadata source: '%s'", url)
         else:
             LOG.critical("Giving up on md from %s after %i seconds",
-                            urls, int(time.time() - starttime))
+                         urls, int(time.time() - starttime))
 
         return bool(url)
 
@@ -188,11 +191,11 @@ def read_maas_seed_dir(seed_d):
 
 
 def read_maas_seed_url(seed_url, header_cb=None, timeout=None,
-    version=MD_VERSION):
+                       version=MD_VERSION, paths=None):
     """
     Read the maas datasource at seed_url.
-    header_cb is a method that should return a headers dictionary that will
-    be given to urllib2.Request()
+      - header_cb is a method that should return a headers dictionary for
+        a given url
 
     Expected format of seed_url is are the following files:
       * <seed_url>/<version>/meta-data/instance-id
@@ -220,13 +223,14 @@ def read_maas_seed_url(seed_url, header_cb=None, timeout=None,
         else:
             headers = {}
         try:
-            resp = uhelp.readurl(url, headers=headers, timeout=timeout)
-            if resp.ok():
+            resp = util.read_file_or_url(url, headers=headers, timeout=timeout,
+                                         ssl_details=util.fetch_ssl_details(paths))
+            if resp.ok:
                 md[name] = str(resp)
             else:
                 LOG.warn(("Fetching from %s resulted in"
                           " an invalid http code %s"), url, resp.code)
-        except urllib2.HTTPError as e:
+        except url_helper.UrlError as e:
             if e.code != 404:
                 raise
     return check_seed_contents(md, seed_url)
@@ -369,7 +373,8 @@ if __name__ == "__main__":
         if args.subcmd == "check-seed":
             if args.url.startswith("http"):
                 (userdata, metadata) = read_maas_seed_url(args.url,
-                    header_cb=my_headers, version=args.apiver)
+                                                          header_cb=my_headers,
+                                                          version=args.apiver)
             else:
                 (userdata, metadata) = read_maas_seed_url(args.url)
             print "=== userdata ==="
diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py
index f3e3fd7e..0839e63b 100644
--- a/cloudinit/url_helper.py
+++ b/cloudinit/url_helper.py
@@ -20,119 +20,162 @@
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-from contextlib import closing
-
-import errno
-import socket
 import time
-import urllib
-import urllib2
+
+import requests
+from requests import exceptions
+
+from urlparse import (urlparse, urlunparse)
 
 from cloudinit import log as logging
 from cloudinit import version
 
 LOG = logging.getLogger(__name__)
 
+# Check if requests has ssl support (added in requests >= 0.8.8)
+SSL_ENABLED = False
+CONFIG_ENABLED = False # This was added in 0.7
+try:
+    import pkg_resources
+    from distutils.version import LooseVersion
+    _REQ = pkg_resources.get_distribution('requests')
+    _REQ_VER = LooseVersion(_REQ.version)
+    if _REQ_VER >= LooseVersion('0.8.8'):
+        SSL_ENABLED = True
+    if _REQ_VER >= LooseVersion('0.7.0'):
+        CONFIG_ENABLED = True
+except:
+    pass
+
+
+def _cleanurl(url):
+    parsed_url = list(urlparse(url, scheme='http'))
+    if not parsed_url[1] and parsed_url[2]:
+        # Swap these since this seems to be a common
+        # occurrence when given urls like 'www.google.com'
+        parsed_url[1] = parsed_url[2]
+        parsed_url[2] = ''
+    return urlunparse(parsed_url)
+
 
 class UrlResponse(object):
-    def __init__(self, status_code, contents=None, headers=None):
-        self._status_code = status_code
-        self._contents = contents
-        self._headers = headers
+    def __init__(self, response):
+        self._response = response
 
     @property
-    def code(self):
-        return self._status_code
+    def contents(self):
+        return self._response.content
 
     @property
-    def contents(self):
-        return self._contents
+    def url(self):
+        return self._response.url
 
     @property
-    def headers(self):
-        return self._headers
+    def ok(self):
+        return self._response.ok
 
-    def __str__(self):
-        if not self.contents:
-            return ''
-        else:
-            return str(self.contents)
-
-    def ok(self, redirects_ok=False):
-        upper = 300
-        if redirects_ok:
-            upper = 400
-        if self.code >= 200 and self.code < upper:
-            return True
-        else:
-            return False
+    @property
+    def headers(self):
+        return self._response.headers
 
+    @property
+    def code(self):
+        return self._response.status_code
 
-def readurl(url, data=None, timeout=None,
-            retries=0, sec_between=1, headers=None):
+    def __str__(self):
+        return self.contents
 
-    req_args = {}
-    req_args['url'] = url
-    if data is not None:
-        req_args['data'] = urllib.urlencode(data)
 
+class UrlError(IOError):
+    def __init__(self, cause):
+        IOError.__init__(self, str(cause))
+        self.cause = cause
+        if isinstance(cause, exceptions.HTTPError) and cause.response:
+            self.code = cause.response.status_code
+        else:
+            self.code = None
+
+
+def readurl(url, data=None, timeout=None, retries=0, sec_between=1,
+            headers=None, ssl_details=None, check_status=True,
+            allow_redirects=False):
+    url = _cleanurl(url)
+    req_args = {
+        'url': url,
+    }
+    if urlparse(url).scheme == 'https' and ssl_details:
+        if not SSL_ENABLED:
+            LOG.warn("SSL is not enabled, cert. verification can not occur!")
+        else:
+            if 'ca_certs' in ssl_details and ssl_details['ca_certs']:
+                req_args['verify'] = ssl_details['ca_certs']
+            else:
+                req_args['verify'] = True
+            if 'cert_file' in ssl_details and 'key_file' in ssl_details:
+                req_args['cert'] = [ssl_details['cert_file'],
+                                    ssl_details['key_file']]
+            elif 'cert_file' in ssl_details:
+                req_args['cert'] = str(ssl_details['cert_file'])
+                                    
+    req_args['allow_redirects'] = allow_redirects
+    req_args['method'] = 'GET'
+    if timeout is not None:
+        req_args['timeout'] = max(float(timeout), 0)
+    if data:
+        req_args['method'] = 'POST'
+    # It doesn't seem like config
+    # was added in older library versions, thus we
+    # need to manually do the retries if it wasn't
+    if CONFIG_ENABLED:
+        req_config = {
+            'store_cookies': False,
+        }
+        # Don't use the retry support built-in
+        # since it doesn't allow for 'sleep_times'
+        # in between tries....
+        # if retries:
+        #     req_config['max_retries'] = max(int(retries), 0)
+        req_args['config'] = req_config
+    manual_tries = 1
+    if retries:
+        manual_tries = max(int(retries) + 1, 1)
     if not headers:
         headers = {
             'User-Agent': 'Cloud-Init/%s' % (version.version_string()),
         }
-
     req_args['headers'] = headers
-    req = urllib2.Request(**req_args)
-
-    retries = max(retries, 0)
-    attempts = retries + 1
-
-    excepts = []
-    LOG.debug(("Attempting to open '%s' with %s attempts"
-               " (%s retries, timeout=%s) to be performed"),
-              url, attempts, retries, timeout)
-    open_args = {}
-    if timeout is not None:
-        open_args['timeout'] = int(timeout)
-    for i in range(0, attempts):
+    LOG.debug("Attempting to open '%s' with %s configuration", url, req_args)
+    if data:
+        # Do this after the log (it might be large)
+        req_args['data'] = data
+    if sec_between is None:
+        sec_between = -1
+    excps = []
+    # Handle retrying ourselves since the built-in support
+    # doesn't handle sleeping between tries...
+    for i in range(0, manual_tries):
         try:
-            with closing(urllib2.urlopen(req, **open_args)) as rh:
-                content = rh.read()
-                status = rh.getcode()
-                if status is None:
-                    # This seems to happen when files are read...
-                    status = 200
-                headers = {}
-                if rh.headers:
-                    headers = dict(rh.headers)
-                LOG.debug("Read from %s (%s, %sb) after %s attempts",
-                          url, status, len(content), (i + 1))
-                return UrlResponse(status, content, headers)
-        except urllib2.HTTPError as e:
-            excepts.append(e)
-        except urllib2.URLError as e:
-            # This can be a message string or
-            # another exception instance
-            # (socket.error for remote URLs, OSError for local URLs).
-            if (isinstance(e.reason, (OSError)) and
-                e.reason.errno == errno.ENOENT):
-                excepts.append(e.reason)
-            else:
-                excepts.append(e)
-        except Exception as e:
-            excepts.append(e)
-        if i + 1 < attempts:
-            LOG.debug("Please wait %s seconds while we wait to try again",
-                     sec_between)
-            time.sleep(sec_between)
-
-    # Didn't work out
-    LOG.debug("Failed reading from %s after %s attempts", url, attempts)
-
-    # It must of errored at least once for code
-    # to get here so re-raise the last error
-    LOG.debug("%s errors occured, re-raising the last one", len(excepts))
-    raise excepts[-1]
+            r = requests.request(**req_args)
+            if check_status:
+                r.raise_for_status()
+            contents = r.content
+            status = r.status_code
+            headers = r.headers
+            LOG.debug("Read from %s (%s, %sb) after %s attempts", url,
+                      status, len(contents), (i + 1))
+            # Doesn't seem like we can make it use a different
+            # subclass for responses, so add our own backward-compat
+            # attrs
+            return UrlResponse(r)
+        except exceptions.RequestException as e:
+            excps.append(UrlError(e))
+            if i + 1 < manual_tries and sec_between > 0:
+                LOG.debug("Please wait %s seconds while we wait to try again",
+                          sec_between)
+                time.sleep(sec_between)
+    if excps:
+        raise excps[-1]
+    return None # Should throw before this...
 
 
 def wait_for_url(urls, max_wait=None, timeout=None,
@@ -143,7 +186,7 @@ def wait_for_url(urls, max_wait=None, timeout=None,
     max_wait:  roughly the maximum time to wait before giving up
                The max time is *actually* len(urls)*timeout as each url will
                be tried once and given the timeout provided.
-    timeout:   the timeout provided to urllib2.urlopen
+    timeout:   the timeout provided to urlopen
     status_cb: call method with string message when a url is not available
     headers_cb: call method with single argument of url to get headers
                 for request.
@@ -196,7 +239,8 @@ def wait_for_url(urls, max_wait=None, timeout=None,
                 else:
                     headers = {}
 
-                resp = readurl(url, headers=headers, timeout=timeout)
+                resp = readurl(url, headers=headers, timeout=timeout,
+                               check_status=False)
                 if not resp.contents:
                     reason = "empty response [%s]" % (resp.code)
                     e = ValueError(reason)
@@ -205,12 +249,8 @@ def wait_for_url(urls, max_wait=None, timeout=None,
                     e = ValueError(reason)
                 else:
                     return url
-            except urllib2.HTTPError as e:
-                reason = "http error [%s]" % e.code
-            except urllib2.URLError as e:
-                reason = "url error [%s]" % e.reason
-            except socket.timeout as e:
-                reason = "socket timeout [%s]" % e
+            except UrlError as e:
+                reason = "request error [%s]" % e
             except Exception as e:
                 reason = "unexpected error [%s]" % e
 
diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py
index 58827e3d..eaf448a7 100644
--- a/cloudinit/user_data.py
+++ b/cloudinit/user_data.py
@@ -29,7 +29,6 @@ from email.mime.text import MIMEText
 
 from cloudinit import handlers
 from cloudinit import log as logging
-from cloudinit import url_helper
 from cloudinit import util
 
 LOG = logging.getLogger(__name__)
@@ -173,10 +172,10 @@ class UserDataProcessor(object):
             if include_once_on and os.path.isfile(include_once_fn):
                 content = util.load_file(include_once_fn)
             else:
-                resp = url_helper.readurl(include_url)
-                if include_once_on and resp.ok():
+                resp = util.read_file_or_url(include_url)
+                if include_once_on and resp.ok:
                     util.write_file(include_once_fn, str(resp), mode=0600)
-                if resp.ok():
+                if resp.ok:
                     content = str(resp)
                 else:
                     LOG.warn(("Fetching from %s resulted in"
diff --git a/cloudinit/util.py b/cloudinit/util.py
index ffe844b2..42b3ab01 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -51,7 +51,7 @@ import yaml
 from cloudinit import importer
 from cloudinit import log as logging
 from cloudinit import safeyaml
-from cloudinit import url_helper as uhelp
+from cloudinit import url_helper
 from cloudinit import version
 
 from cloudinit.settings import (CFG_BUILTIN)
@@ -70,6 +70,18 @@ FN_ALLOWED = ('_-.()' + string.digits + string.ascii_letters)
 CONTAINER_TESTS = ['running-in-container', 'lxc-is-container']
 
 
+class FileResponse(object):
+    def __init__(self, path, contents):
+        self.code = 200
+        self.headers = {}
+        self.contents = contents
+        self.ok = True
+        self.url = path
+
+    def __str__(self):
+        return self.contents
+
+
 class ProcessExecutionError(IOError):
 
     MESSAGE_TMPL = ('%(description)s\n'
@@ -624,12 +636,53 @@ def read_optional_seed(fill, base="", ext="", timeout=5):
         raise
 
 
-def read_file_or_url(url, timeout=5, retries=10, file_retries=0):
+def fetch_ssl_details(paths=None):
+    ssl_details = {}
+    # Lookup in these locations for ssl key/cert files
+    ssl_cert_paths = [
+        '/var/lib/cloud/data/ssl',
+        '/var/lib/cloud/instance/data/ssl',
+    ]
+    if paths:
+        ssl_cert_paths.extend([
+            os.path.join(paths.get_ipath_cur('data'), 'ssl'),
+            os.path.join(paths.get_cpath('data'), 'ssl'),
+        ])
+    ssl_cert_paths = uniq_merge(ssl_cert_paths)
+    ssl_cert_paths = [d for d in ssl_cert_paths if d and os.path.isdir(d)]
+    cert_file = None
+    for d in ssl_cert_paths:
+        if os.path.isfile(os.path.join(d, 'cert.pem')):
+            cert_file = os.path.join(d, 'cert.pem')
+            break
+    key_file = None
+    for d in ssl_cert_paths:
+        if os.path.isfile(os.path.join(d, 'key.pem')):
+            key_file = os.path.join(d, 'key.pem')
+            break
+    if cert_file and key_file:
+        ssl_details['cert_file'] = cert_file
+        ssl_details['key_file'] = key_file
+    elif cert_file:
+        ssl_details['cert_file'] = cert_file
+    return ssl_details
+
+
+def read_file_or_url(url, timeout=5, retries=10,
+                     headers=None, data=None, sec_between=1, ssl_details=None):
     if url.startswith("/"):
         url = "file://%s" % url
-    if url.startswith("file://"):
-        retries = file_retries
-    return uhelp.readurl(url, timeout=timeout, retries=retries)
+    if url.lower().startswith("file://"):
+        file_path = url[len("file://"):]
+        return FileResponse(file_path, contents=load_file(file_path))
+    else:
+        return url_helper.readurl(url,
+                                  timeout=timeout,
+                                  retries=retries,
+                                  headers=headers,
+                                  data=data,
+                                  sec_between=sec_between,
+                                  ssl_details=ssl_details)
 
 
 def load_yaml(blob, default=None, allowed=(dict,)):
@@ -671,13 +724,13 @@ def read_seeded(base="", ext="", timeout=5, retries=10, file_retries=0):
 
     md_resp = read_file_or_url(md_url, timeout, retries, file_retries)
     md = None
-    if md_resp.ok():
+    if md_resp.ok:
         md_str = str(md_resp)
         md = load_yaml(md_str, default={})
 
     ud_resp = read_file_or_url(ud_url, timeout, retries, file_retries)
     ud = None
-    if ud_resp.ok():
+    if ud_resp.ok:
         ud_str = str(ud_resp)
         ud = ud_str
 
@@ -846,8 +899,8 @@ def get_cmdline_url(names=('cloud-config-url', 'url'),
     if not url:
         return (None, None, None)
 
-    resp = uhelp.readurl(url)
-    if resp.contents.startswith(starts) and resp.ok():
+    resp = read_file_or_url(url)
+    if resp.contents.startswith(starts) and resp.ok:
         return (key, url, str(resp))
 
     return (key, url, None)