diff options
Diffstat (limited to 'cloudinit/url_helper.py')
-rw-r--r-- | cloudinit/url_helper.py | 247 |
1 files changed, 151 insertions, 96 deletions
diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index f3e3fd7e..bfc5cfdd 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -20,43 +20,55 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -from contextlib import closing - -import errno -import socket import time -import urllib -import urllib2 + +import requests +from requests import exceptions + +from urlparse import (urlparse, urlunparse) from cloudinit import log as logging from cloudinit import version LOG = logging.getLogger(__name__) +# Check if requests has ssl support (added in requests >= 0.8.8) +SSL_ENABLED = False +CONFIG_ENABLED = False # This was added in 0.7 (but taken out in >=1.0) +try: + import pkg_resources + from distutils.version import LooseVersion + _REQ = pkg_resources.get_distribution('requests') + _REQ_VER = LooseVersion(_REQ.version) + if _REQ_VER >= LooseVersion('0.8.8'): + SSL_ENABLED = True + if _REQ_VER >= LooseVersion('0.7.0') and _REQ_VER < LooseVersion('1.0.0'): + CONFIG_ENABLED = True +except: + pass + + +def _cleanurl(url): + parsed_url = list(urlparse(url, scheme='http')) + if not parsed_url[1] and parsed_url[2]: + # Swap these since this seems to be a common + # occurrence when given urls like 'www.google.com' + parsed_url[1] = parsed_url[2] + parsed_url[2] = '' + return urlunparse(parsed_url) -class UrlResponse(object): - def __init__(self, status_code, contents=None, headers=None): - self._status_code = status_code - self._contents = contents - self._headers = headers - @property - def code(self): - return self._status_code +class UrlResponse(object): + def __init__(self, response): + self._response = response @property def contents(self): - return self._contents + return self._response.content @property - def headers(self): - return self._headers - - def __str__(self): - if not self.contents: - return '' - else: - return str(self.contents) + def url(self): + return self._response.url def ok(self, redirects_ok=False): upper = 300 @@ -67,72 +79,111 @@ class UrlResponse(object): else: return False + @property + def headers(self): + return self._response.headers -def readurl(url, data=None, timeout=None, - retries=0, sec_between=1, headers=None): - - req_args = {} - req_args['url'] = url - if data is not None: - req_args['data'] = urllib.urlencode(data) + @property + def code(self): + return self._response.status_code + def __str__(self): + return self.contents + + +class UrlError(IOError): + def __init__(self, cause, code=None, headers=None): + IOError.__init__(self, str(cause)) + self.cause = cause + self.code = code + self.headers = headers + if self.headers is None: + self.headers = {} + + +def readurl(url, data=None, timeout=None, retries=0, sec_between=1, + headers=None, ssl_details=None, check_status=True, + allow_redirects=False): + url = _cleanurl(url) + req_args = { + 'url': url, + } + if urlparse(url).scheme == 'https' and ssl_details: + if not SSL_ENABLED: + LOG.warn("SSL is not enabled, cert. verification can not occur!") + else: + if 'ca_certs' in ssl_details and ssl_details['ca_certs']: + req_args['verify'] = ssl_details['ca_certs'] + else: + req_args['verify'] = True + if 'cert_file' in ssl_details and 'key_file' in ssl_details: + req_args['cert'] = [ssl_details['cert_file'], + ssl_details['key_file']] + elif 'cert_file' in ssl_details: + req_args['cert'] = str(ssl_details['cert_file']) + + req_args['allow_redirects'] = allow_redirects + req_args['method'] = 'GET' + if timeout is not None: + req_args['timeout'] = max(float(timeout), 0) + if data: + req_args['method'] = 'POST' + # It doesn't seem like config + # was added in older library versions (or newer ones either), thus we + # need to manually do the retries if it wasn't... + if CONFIG_ENABLED: + req_config = { + 'store_cookies': False, + } + # Don't use the retry support built-in + # since it doesn't allow for 'sleep_times' + # in between tries.... + # if retries: + # req_config['max_retries'] = max(int(retries), 0) + req_args['config'] = req_config + manual_tries = 1 + if retries: + manual_tries = max(int(retries) + 1, 1) if not headers: headers = { 'User-Agent': 'Cloud-Init/%s' % (version.version_string()), } - req_args['headers'] = headers - req = urllib2.Request(**req_args) - - retries = max(retries, 0) - attempts = retries + 1 - - excepts = [] - LOG.debug(("Attempting to open '%s' with %s attempts" - " (%s retries, timeout=%s) to be performed"), - url, attempts, retries, timeout) - open_args = {} - if timeout is not None: - open_args['timeout'] = int(timeout) - for i in range(0, attempts): + LOG.debug("Attempting to open '%s' with %s configuration", url, req_args) + if data: + # Do this after the log (it might be large) + req_args['data'] = data + if sec_between is None: + sec_between = -1 + excps = [] + # Handle retrying ourselves since the built-in support + # doesn't handle sleeping between tries... + for i in range(0, manual_tries): try: - with closing(urllib2.urlopen(req, **open_args)) as rh: - content = rh.read() - status = rh.getcode() - if status is None: - # This seems to happen when files are read... - status = 200 - headers = {} - if rh.headers: - headers = dict(rh.headers) - LOG.debug("Read from %s (%s, %sb) after %s attempts", - url, status, len(content), (i + 1)) - return UrlResponse(status, content, headers) - except urllib2.HTTPError as e: - excepts.append(e) - except urllib2.URLError as e: - # This can be a message string or - # another exception instance - # (socket.error for remote URLs, OSError for local URLs). - if (isinstance(e.reason, (OSError)) and - e.reason.errno == errno.ENOENT): - excepts.append(e.reason) + r = requests.request(**req_args) + if check_status: + r.raise_for_status() + LOG.debug("Read from %s (%s, %sb) after %s attempts", url, + r.status_code, len(r.content), (i + 1)) + # Doesn't seem like we can make it use a different + # subclass for responses, so add our own backward-compat + # attrs + return UrlResponse(r) + except exceptions.RequestException as e: + if (isinstance(e, (exceptions.HTTPError)) + and hasattr(e, 'response') # This appeared in v 0.10.8 + and e.response): + excps.append(UrlError(e, code=e.response.status_code, + headers=e.response.headers)) else: - excepts.append(e) - except Exception as e: - excepts.append(e) - if i + 1 < attempts: - LOG.debug("Please wait %s seconds while we wait to try again", - sec_between) - time.sleep(sec_between) - - # Didn't work out - LOG.debug("Failed reading from %s after %s attempts", url, attempts) - - # It must of errored at least once for code - # to get here so re-raise the last error - LOG.debug("%s errors occured, re-raising the last one", len(excepts)) - raise excepts[-1] + excps.append(UrlError(e)) + if i + 1 < manual_tries and sec_between > 0: + LOG.debug("Please wait %s seconds while we wait to try again", + sec_between) + time.sleep(sec_between) + if excps: + raise excps[-1] + return None # Should throw before this... def wait_for_url(urls, max_wait=None, timeout=None, @@ -143,7 +194,7 @@ def wait_for_url(urls, max_wait=None, timeout=None, max_wait: roughly the maximum time to wait before giving up The max time is *actually* len(urls)*timeout as each url will be tried once and given the timeout provided. - timeout: the timeout provided to urllib2.urlopen + timeout: the timeout provided to urlopen status_cb: call method with string message when a url is not available headers_cb: call method with single argument of url to get headers for request. @@ -190,36 +241,40 @@ def wait_for_url(urls, max_wait=None, timeout=None, timeout = int((start_time + max_wait) - now) reason = "" + e = None try: if headers_cb is not None: headers = headers_cb(url) else: headers = {} - resp = readurl(url, headers=headers, timeout=timeout) - if not resp.contents: - reason = "empty response [%s]" % (resp.code) - e = ValueError(reason) - elif not resp.ok(): - reason = "bad status code [%s]" % (resp.code) - e = ValueError(reason) + response = readurl(url, headers=headers, timeout=timeout, + check_status=False) + if not response.contents: + reason = "empty response [%s]" % (response.code) + e = UrlError(ValueError(reason), + code=response.code, headers=response.headers) + elif not response.ok(): + reason = "bad status code [%s]" % (response.code) + e = UrlError(ValueError(reason), + code=response.code, headers=response.headers) else: return url - except urllib2.HTTPError as e: - reason = "http error [%s]" % e.code - except urllib2.URLError as e: - reason = "url error [%s]" % e.reason - except socket.timeout as e: - reason = "socket timeout [%s]" % e + except UrlError as e: + reason = "request error [%s]" % e except Exception as e: reason = "unexpected error [%s]" % e time_taken = int(time.time() - start_time) status_msg = "Calling '%s' failed [%s/%ss]: %s" % (url, - time_taken, - max_wait, reason) + time_taken, + max_wait, + reason) status_cb(status_msg) if exception_cb: + # This can be used to alter the headers that will be sent + # in the future, for example this is what the MAAS datasource + # does. exception_cb(msg=status_msg, exception=e) if timeup(max_wait, start_time): |