1 files changed, 151 insertions, 96 deletions
diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py
index f3e3fd7e..bfc5cfdd 100644
--- a/cloudinit/url_helper.py
+++ b/cloudinit/url_helper.py
@@ -20,43 +20,55 @@
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-from contextlib import closing
-
-import errno
-import socket
 import time
-import urllib
-import urllib2
+
+import requests
+from requests import exceptions
+
+from urlparse import (urlparse, urlunparse)
 
 from cloudinit import log as logging
 from cloudinit import version
 
 LOG = logging.getLogger(__name__)
 
+# Check if requests has ssl support (added in requests >= 0.8.8)
+SSL_ENABLED = False
+CONFIG_ENABLED = False # This was added in 0.7 (but taken out in >=1.0)
+try:
+    import pkg_resources
+    from distutils.version import LooseVersion
+    _REQ = pkg_resources.get_distribution('requests')
+    _REQ_VER = LooseVersion(_REQ.version)
+    if _REQ_VER >= LooseVersion('0.8.8'):
+        SSL_ENABLED = True
+    if _REQ_VER >= LooseVersion('0.7.0') and _REQ_VER < LooseVersion('1.0.0'):
+        CONFIG_ENABLED = True
+except:
+    pass
+
+
+def _cleanurl(url):
+    parsed_url = list(urlparse(url, scheme='http'))
+    if not parsed_url[1] and parsed_url[2]:
+        # Swap these since this seems to be a common
+        # occurrence when given urls like 'www.google.com'
+        parsed_url[1] = parsed_url[2]
+        parsed_url[2] = ''
+    return urlunparse(parsed_url)
 
-class UrlResponse(object):
-    def __init__(self, status_code, contents=None, headers=None):
-        self._status_code = status_code
-        self._contents = contents
-        self._headers = headers
 
-    @property
-    def code(self):
-        return self._status_code
+class UrlResponse(object):
+    def __init__(self, response):
+        self._response = response
 
     @property
     def contents(self):
-        return self._contents
+        return self._response.content
 
     @property
-    def headers(self):
-        return self._headers
-
-    def __str__(self):
-        if not self.contents:
-            return ''
-        else:
-            return str(self.contents)
+    def url(self):
+        return self._response.url
 
     def ok(self, redirects_ok=False):
         upper = 300
@@ -67,72 +79,111 @@ class UrlResponse(object):
         else:
             return False
 
+    @property
+    def headers(self):
+        return self._response.headers
 
-def readurl(url, data=None, timeout=None,
-            retries=0, sec_between=1, headers=None):
-
-    req_args = {}
-    req_args['url'] = url
-    if data is not None:
-        req_args['data'] = urllib.urlencode(data)
+    @property
+    def code(self):
+        return self._response.status_code
 
+    def __str__(self):
+        return self.contents
+
+
+class UrlError(IOError):
+    def __init__(self, cause, code=None, headers=None):
+        IOError.__init__(self, str(cause))
+        self.cause = cause
+        self.code = code
+        self.headers = headers
+        if self.headers is None:
+            self.headers = {}
+
+
+def readurl(url, data=None, timeout=None, retries=0, sec_between=1,
+            headers=None, ssl_details=None, check_status=True,
+            allow_redirects=False):
+    url = _cleanurl(url)
+    req_args = {
+        'url': url,
+    }
+    if urlparse(url).scheme == 'https' and ssl_details:
+        if not SSL_ENABLED:
+            LOG.warn("SSL is not enabled, cert. verification can not occur!")
+        else:
+            if 'ca_certs' in ssl_details and ssl_details['ca_certs']:
+                req_args['verify'] = ssl_details['ca_certs']
+            else:
+                req_args['verify'] = True
+            if 'cert_file' in ssl_details and 'key_file' in ssl_details:
+                req_args['cert'] = [ssl_details['cert_file'],
+                                    ssl_details['key_file']]
+            elif 'cert_file' in ssl_details:
+                req_args['cert'] = str(ssl_details['cert_file'])
+                                    
+    req_args['allow_redirects'] = allow_redirects
+    req_args['method'] = 'GET'
+    if timeout is not None:
+        req_args['timeout'] = max(float(timeout), 0)
+    if data:
+        req_args['method'] = 'POST'
+    # It doesn't seem like config
+    # was added in older library versions (or newer ones either), thus we
+    # need to manually do the retries if it wasn't...
+    if CONFIG_ENABLED:
+        req_config = {
+            'store_cookies': False,
+        }
+        # Don't use the retry support built-in
+        # since it doesn't allow for 'sleep_times'
+        # in between tries....
+        # if retries:
+        #     req_config['max_retries'] = max(int(retries), 0)
+        req_args['config'] = req_config
+    manual_tries = 1
+    if retries:
+        manual_tries = max(int(retries) + 1, 1)
     if not headers:
         headers = {
             'User-Agent': 'Cloud-Init/%s' % (version.version_string()),
         }
-
     req_args['headers'] = headers
-    req = urllib2.Request(**req_args)
-
-    retries = max(retries, 0)
-    attempts = retries + 1
-
-    excepts = []
-    LOG.debug(("Attempting to open '%s' with %s attempts"
-               " (%s retries, timeout=%s) to be performed"),
-              url, attempts, retries, timeout)
-    open_args = {}
-    if timeout is not None:
-        open_args['timeout'] = int(timeout)
-    for i in range(0, attempts):
+    LOG.debug("Attempting to open '%s' with %s configuration", url, req_args)
+    if data:
+        # Do this after the log (it might be large)
+        req_args['data'] = data
+    if sec_between is None:
+        sec_between = -1
+    excps = []
+    # Handle retrying ourselves since the built-in support
+    # doesn't handle sleeping between tries...
+    for i in range(0, manual_tries):
         try:
-            with closing(urllib2.urlopen(req, **open_args)) as rh:
-                content = rh.read()
-                status = rh.getcode()
-                if status is None:
-                    # This seems to happen when files are read...
-                    status = 200
-                headers = {}
-                if rh.headers:
-                    headers = dict(rh.headers)
-                LOG.debug("Read from %s (%s, %sb) after %s attempts",
-                          url, status, len(content), (i + 1))
-                return UrlResponse(status, content, headers)
-        except urllib2.HTTPError as e:
-            excepts.append(e)
-        except urllib2.URLError as e:
-            # This can be a message string or
-            # another exception instance
-            # (socket.error for remote URLs, OSError for local URLs).
-            if (isinstance(e.reason, (OSError)) and
-                e.reason.errno == errno.ENOENT):
-                excepts.append(e.reason)
+            r = requests.request(**req_args)
+            if check_status:
+                r.raise_for_status()
+            LOG.debug("Read from %s (%s, %sb) after %s attempts", url,
+                      r.status_code, len(r.content), (i + 1))
+            # Doesn't seem like we can make it use a different
+            # subclass for responses, so add our own backward-compat
+            # attrs
+            return UrlResponse(r)
+        except exceptions.RequestException as e:
+            if (isinstance(e, (exceptions.HTTPError))
+                and hasattr(e, 'response') # This appeared in v 0.10.8
+                and e.response):
+                excps.append(UrlError(e, code=e.response.status_code,
+                                      headers=e.response.headers))
             else:
-                excepts.append(e)
-        except Exception as e:
-            excepts.append(e)
-        if i + 1 < attempts:
-            LOG.debug("Please wait %s seconds while we wait to try again",
-                     sec_between)
-            time.sleep(sec_between)
-
-    # Didn't work out
-    LOG.debug("Failed reading from %s after %s attempts", url, attempts)
-
-    # It must of errored at least once for code
-    # to get here so re-raise the last error
-    LOG.debug("%s errors occured, re-raising the last one", len(excepts))
-    raise excepts[-1]
+                excps.append(UrlError(e))
+            if i + 1 < manual_tries and sec_between > 0:
+                LOG.debug("Please wait %s seconds while we wait to try again",
+                          sec_between)
+                time.sleep(sec_between)
+    if excps:
+        raise excps[-1]
+    return None # Should throw before this...
 
 
 def wait_for_url(urls, max_wait=None, timeout=None,
@@ -143,7 +194,7 @@ def wait_for_url(urls, max_wait=None, timeout=None,
     max_wait:  roughly the maximum time to wait before giving up
                The max time is *actually* len(urls)*timeout as each url will
                be tried once and given the timeout provided.
-    timeout:   the timeout provided to urllib2.urlopen
+    timeout:   the timeout provided to urlopen
     status_cb: call method with string message when a url is not available
     headers_cb: call method with single argument of url to get headers
                 for request.
@@ -190,36 +241,40 @@ def wait_for_url(urls, max_wait=None, timeout=None,
                     timeout = int((start_time + max_wait) - now)
 
             reason = ""
+            e = None
             try:
                 if headers_cb is not None:
                     headers = headers_cb(url)
                 else:
                     headers = {}
 
-                resp = readurl(url, headers=headers, timeout=timeout)
-                if not resp.contents:
-                    reason = "empty response [%s]" % (resp.code)
-                    e = ValueError(reason)
-                elif not resp.ok():
-                    reason = "bad status code [%s]" % (resp.code)
-                    e = ValueError(reason)
+                response = readurl(url, headers=headers, timeout=timeout,
+                                   check_status=False)
+                if not response.contents:
+                    reason = "empty response [%s]" % (response.code)
+                    e = UrlError(ValueError(reason),
+                                 code=response.code, headers=response.headers)
+                elif not response.ok():
+                    reason = "bad status code [%s]" % (response.code)
+                    e = UrlError(ValueError(reason),
+                                 code=response.code, headers=response.headers)
                 else:
                     return url
-            except urllib2.HTTPError as e:
-                reason = "http error [%s]" % e.code
-            except urllib2.URLError as e:
-                reason = "url error [%s]" % e.reason
-            except socket.timeout as e:
-                reason = "socket timeout [%s]" % e
+            except UrlError as e:
+                reason = "request error [%s]" % e
             except Exception as e:
                 reason = "unexpected error [%s]" % e
 
             time_taken = int(time.time() - start_time)
             status_msg = "Calling '%s' failed [%s/%ss]: %s" % (url,
-                                                             time_taken,
-                                                             max_wait, reason)
+                                                               time_taken,
+                                                               max_wait,
+                                                               reason)
             status_cb(status_msg)
             if exception_cb:
+                # This can be used to alter the headers that will be sent
+                # in the future, for example this is what the MAAS datasource
+                # does.
                 exception_cb(msg=status_msg, exception=e)
 
         if timeup(max_wait, start_time):