From 914c6e86f1689ae186a0db836e7f0304d72c38b4 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Thu, 18 Oct 2012 10:34:38 -0700 Subject: Start fixing how boto and our url fetching can not use ssl correctly (they can not do validation due to the underlying usage of urllib/httplib). Adjust to use urllib3 which can in cloud-init url fetching. For now also take the metadata/userdata extraction code from boto (small amount) and fix it in a new local file called 'ec2_utils'. --- cloudinit/url_helper.py | 97 +++++++++++++++++-------------------------------- 1 file changed, 33 insertions(+), 64 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index f3e3fd7e..e3f63021 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -26,7 +26,9 @@ import errno import socket import time import urllib -import urllib2 + +from urllib3 import connectionpool +from urllib3 import util from cloudinit import log as logging from cloudinit import version @@ -68,71 +70,38 @@ class UrlResponse(object): return False -def readurl(url, data=None, timeout=None, - retries=0, sec_between=1, headers=None): - +def readurl(url, data=None, timeout=None, retries=0, + headers=None, ssl_details=None): req_args = {} - req_args['url'] = url - if data is not None: - req_args['data'] = urllib.urlencode(data) - - if not headers: - headers = { - 'User-Agent': 'Cloud-Init/%s' % (version.version_string()), + p_url = util.parse_url(url) + if p_url.scheme == 'https' and ssl_details: + for k in ['key_file', 'cert_file', 'cert_reqs', 'ca_certs']: + if k in ssl_details: + req_args[k] = ssl_details[k] + with closing(connectionpool.connection_from_url(url, **req_args)) as req_p: + retries = max(int(retries), 0) + attempts = retries + 1 + LOG.debug(("Attempting to open '%s' with %s attempts" + " (%s retries, timeout=%s) to be performed"), + url, attempts, retries, timeout) + open_args = { + 'method': 'GET', + 'retries': retries, + 'redirect': False, + 'url': p_url.request_uri, } - - req_args['headers'] = headers - req = urllib2.Request(**req_args) - - retries = max(retries, 0) - attempts = retries + 1 - - excepts = [] - LOG.debug(("Attempting to open '%s' with %s attempts" - " (%s retries, timeout=%s) to be performed"), - url, attempts, retries, timeout) - open_args = {} - if timeout is not None: - open_args['timeout'] = int(timeout) - for i in range(0, attempts): - try: - with closing(urllib2.urlopen(req, **open_args)) as rh: - content = rh.read() - status = rh.getcode() - if status is None: - # This seems to happen when files are read... - status = 200 - headers = {} - if rh.headers: - headers = dict(rh.headers) - LOG.debug("Read from %s (%s, %sb) after %s attempts", - url, status, len(content), (i + 1)) - return UrlResponse(status, content, headers) - except urllib2.HTTPError as e: - excepts.append(e) - except urllib2.URLError as e: - # This can be a message string or - # another exception instance - # (socket.error for remote URLs, OSError for local URLs). - if (isinstance(e.reason, (OSError)) and - e.reason.errno == errno.ENOENT): - excepts.append(e.reason) - else: - excepts.append(e) - except Exception as e: - excepts.append(e) - if i + 1 < attempts: - LOG.debug("Please wait %s seconds while we wait to try again", - sec_between) - time.sleep(sec_between) - - # Didn't work out - LOG.debug("Failed reading from %s after %s attempts", url, attempts) - - # It must of errored at least once for code - # to get here so re-raise the last error - LOG.debug("%s errors occured, re-raising the last one", len(excepts)) - raise excepts[-1] + if data is not None: + open_args['body'] = urllib.urlencode(data) + open_args['method'] = 'POST' + if not headers: + headers = { + 'User-Agent': 'Cloud-Init/%s' % (version.version_string()), + } + open_args['headers'] = headers + if timeout is not None: + open_args['timeout'] = max(int(timeout), 0) + r = req_p.urlopen(**open_args) + return UrlResponse(r.status, r.data, r.headers) def wait_for_url(urls, max_wait=None, timeout=None, -- cgit v1.2.3 From 7c9bbbc9b49425e3ba8e0517908477c58ea51d4b Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Fri, 19 Oct 2012 14:06:21 -0700 Subject: Remove the need for boto just for fetching the userdata and metadata. Add in this crawling functionality to the ec2_utils module that will fully crawl (not lazily) the ec2 metadata and parse it in the same manner as boto. 1. Make the ec2 datasource + cloudstack now call into these. 2. Fix phone_home due to urllib3 change (TBD) --- Requires | 5 - cloudinit/config/cc_phone_home.py | 2 +- cloudinit/ec2_utils.py | 210 +++++++++++++++++------------- cloudinit/sources/DataSourceCloudStack.py | 9 +- cloudinit/sources/DataSourceEc2.py | 9 +- cloudinit/url_helper.py | 16 +-- 6 files changed, 131 insertions(+), 120 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/Requires b/Requires index b23dd4e9..13a5d997 100644 --- a/Requires +++ b/Requires @@ -10,11 +10,6 @@ PrettyTable # datasource is removed, this is no longer needed oauth -# This is used to fetch the ec2 metadata into a easily -# parseable format, instead of having to have cloud-init perform -# those same fetchs and decodes and signing (...) that ec2 requires. -boto - # This is only needed for places where we need to support configs in a manner # that the built-in config parser is not sufficent (ie # when we need to preserve comments, or do not have a top-level diff --git a/cloudinit/config/cc_phone_home.py b/cloudinit/config/cc_phone_home.py index ae1349eb..a268d66f 100644 --- a/cloudinit/config/cc_phone_home.py +++ b/cloudinit/config/cc_phone_home.py @@ -112,7 +112,7 @@ def handle(name, cfg, cloud, log, args): } url = templater.render_string(url, url_params) try: - uhelp.readurl(url, data=real_submit_keys, retries=tries, sec_between=3) + uhelp.readurl(url, data=real_submit_keys, retries=tries) except: util.logexc(log, ("Failed to post phone home data to" " %s in %s tries"), url, tries) diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index ef7fac7d..b9d7a2f7 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -16,6 +16,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +from urlparse import (urlparse, urlunparse) + import json import urllib @@ -26,110 +28,132 @@ from cloudinit import util LOG = logging.getLogger(__name__) -# For now take this and fix it... -class LazyLoadMetadata(dict): - def __init__(self, url, fetch_timeout, num_retries, ssl_details): - self._url = url - self._num_retries = num_retries - self._ssl_details = ssl_details - self._fetch_timeout = fetch_timeout - self._leaves = {} - self._dicts = [] - response = uh.readurl(url, timeout=fetch_timeout, - retries=num_retries, ssl_details=ssl_details) - data = str(response) - if data: - fields = data.split('\n') - for field in fields: - if field.endswith('/'): - key = field[0:-1] - self._dicts.append(key) - else: - p = field.find('=') - if p > 0: - key = field[p + 1:] - resource = field[0:p] + '/openssh-key' - else: - key = resource = field - self._leaves[key] = resource - self[key] = None - - def _materialize(self): - for key in self: - self[key] - - def __getitem__(self, key): - if key not in self: - # Allow dict to throw the KeyError - return super(LazyLoadMetadata, self).__getitem__(key) - - # Already loaded - val = super(LazyLoadMetadata, self).__getitem__(key) - if val is not None: - return val - - if key in self._leaves: - resource = self._leaves[key] - new_url = self._url + urllib.quote(resource, safe="/:") - response = uh.readurl(new_url, retries=self._num_retries, - timeout=self._fetch_timeout, - ssl_details=self._ssl_details) - val = str(response) - if val and val[0] == '{': - val = json.loads(val) +def combine_url(base, add_on): + base_parsed = list(urlparse(base)) + path = base_parsed[2] + if path and not path.endswith("/"): + path += "/" + path += urllib.quote(str(add_on), safe="/:") + base_parsed[2] = path + return urlunparse(base_parsed) + + +# See: http://bit.ly/TyoUQs +class MetadataMaterializer(object): + def __init__(self, blob, base_url, **fetch_settings): + self._blob = blob + self._md = None + self._base_url = base_url + self._fetch_settings = fetch_settings + + def _parse(self, blob): + leaves = {} + children = [] + if not blob: + return (leaves, children) + + def has_children(item): + if item.endswith("/"): + return True + else: + return False + + def get_name(item): + if item.endswith("/"): + return item.rstrip("/") + return item + + for field in blob.splitlines(): + field = field.strip() + field_name = get_name(field) + if not field or not field_name: + continue + if has_children(field): + if field_name not in children: + children.append(field_name) + else: + contents = field.split("=", 1) + resource = field_name + if len(contents) > 1: + # What a PITA... + (ident, sub_contents) = contents + checked_ident = util.safe_int(ident) + if checked_ident is not None: + resource = "%s/openssh-key" % (checked_ident) + field_name = sub_contents + leaves[field_name] = resource + return (leaves, children) + + def materialize(self): + if self._md is not None: + return self._md + self._md = self._materialize(self._blob, self._base_url) + return self._md + + def _fetch_url(self, url, **opts): + response = uh.readurl(url, **opts) + return str(response) + + def _decode_leaf_blob(self, blob): + if not blob: + return blob + stripped_blob = blob.strip() + if stripped_blob.startswith("{") and stripped_blob.endswith("}"): + # Assume and try with json + try: + return json.loads(blob) + except (ValueError, TypeError): + pass + if blob.find("\n") != -1: + return blob.splitlines() + return blob + + def _materialize(self, blob, base_url): + (leaves, children) = self._parse(blob) + child_contents = {} + for c in children: + child_url = combine_url(base_url, c) + if not child_url.endswith("/"): + child_url += "/" + child_blob = self._fetch_url(child_url, **self._fetch_settings) + child_contents[c] = self._materialize(child_blob, child_url) + leaf_contents = {} + for (field, resource) in leaves.items(): + leaf_url = combine_url(base_url, resource) + leaf_blob = self._fetch_url(leaf_url, **self._fetch_settings) + leaf_contents[field] = self._decode_leaf_blob(leaf_blob) + joined = {} + joined.update(child_contents) + for field in leaf_contents.keys(): + if field in joined: + LOG.warn("Duplicate key found in results from %s", base_url) else: - p = val.find('\n') - if p > 0: - val = val.split('\n') - self[key] = val - elif key in self._dicts: - new_url = self._url + key + '/' - self[key] = LazyLoadMetadata(new_url, - num_retries=self._num_retries, - fetch_timeout=self._fetch_timeout, - ssl_details=self._ssl_details) - - return super(LazyLoadMetadata, self).__getitem__(key) - - def get(self, key, default=None): - try: - return self[key] - except KeyError: - return default - - def values(self): - self._materialize() - return super(LazyLoadMetadata, self).values() - - def items(self): - self._materialize() - return super(LazyLoadMetadata, self).items() - - def __str__(self): - self._materialize() - return super(LazyLoadMetadata, self).__str__() - - def __repr__(self): - self._materialize() - return super(LazyLoadMetadata, self).__repr__() + joined[field] = leaf_contents[field] + return joined def get_instance_userdata(url, version='latest', ssl_details=None): - ud_url = '%s/%s/user-data' % (url, version) + ud_url = combine_url(url, version) + ud_url = combine_url(ud_url, 'user-data') try: response = uh.readurl(ud_url, timeout=5, retries=10, ssl_details=ssl_details) return str(response) - except Exception as e: - util.logexc(LOG, "Failed fetching url %s", ud_url) + except Exception: + util.logexc(LOG, "Failed fetching userdata from url %s", ud_url) return None def get_instance_metadata(url, version='latest', ssl_details=None): - md_url = '%s/%s/meta-data' % (url, version) + md_url = combine_url(url, version) + md_url = combine_url(md_url, 'meta-data') try: - return LazyLoadMetadata(md_url, timeout=5, - retries=10, ssl_details=ssl_details) - except Exception as e: - util.logexc(LOG, "Failed fetching url %s", md_url) + response = uh.readurl(md_url, timeout=5, + retries=10, ssl_details=ssl_details) + materializer = MetadataMaterializer(str(response), md_url, + timeout=5, retries=10, + ssl_details=ssl_details) + return materializer.materialize() + except Exception: + util.logexc(LOG, "Failed fetching metadata from url %s", md_url) return None diff --git a/cloudinit/sources/DataSourceCloudStack.py b/cloudinit/sources/DataSourceCloudStack.py index f7ffa7cb..2654df53 100644 --- a/cloudinit/sources/DataSourceCloudStack.py +++ b/cloudinit/sources/DataSourceCloudStack.py @@ -26,8 +26,7 @@ from struct import pack import os import time -import boto.utils as boto_utils - +from cloudinit import ec2_utils from cloudinit import log as logging from cloudinit import sources from cloudinit import url_helper as uhelp @@ -116,10 +115,8 @@ class DataSourceCloudStack(sources.DataSource): if not self.wait_for_metadata_service(): return False start_time = time.time() - self.userdata_raw = boto_utils.get_instance_userdata(self.api_ver, - None, self.metadata_address) - self.metadata = boto_utils.get_instance_metadata(self.api_ver, - self.metadata_address) + self.userdata_raw = ec2_utils.get_instance_userdata(self.metadata_address, self.api_ver) + self.metadata = ec2_utils.get_instance_metadata(self.metadata_address, self.api_ver) LOG.debug("Crawl of metadata service took %s seconds", int(time.time() - start_time)) return True diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 3686fa10..204963e7 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -23,8 +23,7 @@ import os import time -import boto.utils as boto_utils - +from cloudinit import ec2_utils from cloudinit import log as logging from cloudinit import sources from cloudinit import url_helper as uhelp @@ -65,10 +64,8 @@ class DataSourceEc2(sources.DataSource): if not self.wait_for_metadata_service(): return False start_time = time.time() - self.userdata_raw = boto_utils.get_instance_userdata(self.api_ver, - None, self.metadata_address) - self.metadata = boto_utils.get_instance_metadata(self.api_ver, - self.metadata_address) + self.userdata_raw = ec2_utils.get_instance_userdata(self.metadata_address, self.api_ver) + self.metadata = ec2_utils.get_instance_metadata(self.metadata_address, self.api_ver) LOG.debug("Crawl of metadata service took %s seconds", int(time.time() - start_time)) return True diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index e3f63021..2c9d5eef 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -22,11 +22,10 @@ from contextlib import closing -import errno -import socket import time import urllib +from urllib3 import exceptions from urllib3 import connectionpool from urllib3 import util @@ -91,7 +90,10 @@ def readurl(url, data=None, timeout=None, retries=0, 'url': p_url.request_uri, } if data is not None: - open_args['body'] = urllib.urlencode(data) + if isinstance(data, (str, basestring)): + open_args['body'] = data + else: + open_args['body'] = urllib.urlencode(data) open_args['method'] = 'POST' if not headers: headers = { @@ -112,7 +114,7 @@ def wait_for_url(urls, max_wait=None, timeout=None, max_wait: roughly the maximum time to wait before giving up The max time is *actually* len(urls)*timeout as each url will be tried once and given the timeout provided. - timeout: the timeout provided to urllib2.urlopen + timeout: the timeout provided to urlopen status_cb: call method with string message when a url is not available headers_cb: call method with single argument of url to get headers for request. @@ -174,12 +176,8 @@ def wait_for_url(urls, max_wait=None, timeout=None, e = ValueError(reason) else: return url - except urllib2.HTTPError as e: + except exceptions.HTTPError as e: reason = "http error [%s]" % e.code - except urllib2.URLError as e: - reason = "url error [%s]" % e.reason - except socket.timeout as e: - reason = "socket timeout [%s]" % e except Exception as e: reason = "unexpected error [%s]" % e -- cgit v1.2.3 From 0fc887d97626132e9024490b271888bed162c867 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Sun, 21 Oct 2012 19:37:00 -0700 Subject: Use the python requests lib instead of urllib3 since it provides the same support (and it uses that library). Urllib3 doesn't seem to exist as its own package (requests hides it) so solve that by using requests and handling its oddness ourself. --- cloudinit/url_helper.py | 118 +++++++++++++++++++++++++++++++----------------- 1 file changed, 76 insertions(+), 42 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 2c9d5eef..c5921147 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -20,20 +20,33 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from contextlib import closing - import time -import urllib -from urllib3 import exceptions -from urllib3 import connectionpool -from urllib3 import util +import requests +from requests import exceptions + +from urlparse import urlparse from cloudinit import log as logging from cloudinit import version LOG = logging.getLogger(__name__) +# Check if requests has ssl support (added in requests >= 0.8.8) +SSL_ENABLED = False +CONFIG_ENABLED = False # This was added in 0.7 +try: + import pkg_resources + from distutils.version import LooseVersion + _REQ = pkg_resources.get_distribution('requests') + _REQ_VER = LooseVersion(_REQ.version) + if _REQ_VER >= LooseVersion('0.8.8'): + SSL_ENABLED = True + if _REQ_VER >= LooseVersion('0.7.0'): + CONFIG_ENABLED = True +except: + pass + class UrlResponse(object): def __init__(self, status_code, contents=None, headers=None): @@ -70,40 +83,60 @@ class UrlResponse(object): def readurl(url, data=None, timeout=None, retries=0, - headers=None, ssl_details=None): - req_args = {} - p_url = util.parse_url(url) - if p_url.scheme == 'https' and ssl_details: - for k in ['key_file', 'cert_file', 'cert_reqs', 'ca_certs']: - if k in ssl_details: - req_args[k] = ssl_details[k] - with closing(connectionpool.connection_from_url(url, **req_args)) as req_p: - retries = max(int(retries), 0) - attempts = retries + 1 - LOG.debug(("Attempting to open '%s' with %s attempts" - " (%s retries, timeout=%s) to be performed"), - url, attempts, retries, timeout) - open_args = { - 'method': 'GET', - 'retries': retries, - 'redirect': False, - 'url': p_url.request_uri, - } - if data is not None: - if isinstance(data, (str, basestring)): - open_args['body'] = data + headers=None, ssl_details=None, check_status=True): + req_args = { + 'url': url, + } + if urlparse(url).scheme == 'https' and ssl_details: + if not SSL_ENABLED: + LOG.warn("SSL is not enabled, cert. verification can not occur!") + else: + if 'ca_certs' in ssl_details and ssl_details['ca_certs']: + req_args['verify'] = ssl_details['ca_certs'] else: - open_args['body'] = urllib.urlencode(data) - open_args['method'] = 'POST' - if not headers: - headers = { - 'User-Agent': 'Cloud-Init/%s' % (version.version_string()), - } - open_args['headers'] = headers - if timeout is not None: - open_args['timeout'] = max(int(timeout), 0) - r = req_p.urlopen(**open_args) - return UrlResponse(r.status, r.data, r.headers) + req_args['verify'] = True + if 'cert_file' in ssl_details and 'key_file' in ssl_details: + req_args['cert'] = [ssl_details['cert_file'], + ssl_details['key_file']] + req_args['allow_redirects'] = False + req_args['method'] = 'GET' + if timeout is not None: + req_args['timeout'] = max(float(timeout), 0) + if data: + req_args['method'] = 'POST' + # It doesn't seem like config + # was added in older library versions, thus we + # need to manually do the retries if it wasn't + manual_tries = 1 + if CONFIG_ENABLED: + req_config = {} + req_config['store_cookies'] = False + if retries: + req_config['max_retries'] = max(int(retries), 0) + req_args['config'] = req_config + else: + if retries: + manual_tries = max(int(retries) + 1, 1) + if not headers: + headers = { + 'User-Agent': 'Cloud-Init/%s' % (version.version_string()), + } + req_args['headers'] = headers + LOG.debug("Attempting to open '%s' with %s configuration", url, req_args) + if data: + # Do this after the log (it might be large) + req_args['data'] = data + last_excp = [] + for _i in range(0, manual_tries): + try: + r = requests.request(**req_args) + except exceptions.RequestException as e: + last_excp = [e] + if last_excp: + raise last_excp[-1] + if check_status: + r.raise_for_status() + return UrlResponse(r.status_code, r.content, r.headers) def wait_for_url(urls, max_wait=None, timeout=None, @@ -167,7 +200,8 @@ def wait_for_url(urls, max_wait=None, timeout=None, else: headers = {} - resp = readurl(url, headers=headers, timeout=timeout) + resp = readurl(url, headers=headers, timeout=timeout, + check_status=False) if not resp.contents: reason = "empty response [%s]" % (resp.code) e = ValueError(reason) @@ -176,8 +210,8 @@ def wait_for_url(urls, max_wait=None, timeout=None, e = ValueError(reason) else: return url - except exceptions.HTTPError as e: - reason = "http error [%s]" % e.code + except exceptions.RequestException as e: + reason = "request error [%s]" % e except Exception as e: reason = "unexpected error [%s]" % e -- cgit v1.2.3 From 8036ff686ae7e1a884e6a78022dd86a8eab514ec Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Sun, 21 Oct 2012 20:44:23 -0700 Subject: Fix the checking and capturing of exceptions. --- cloudinit/url_helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index c5921147..93df2510 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -130,12 +130,12 @@ def readurl(url, data=None, timeout=None, retries=0, for _i in range(0, manual_tries): try: r = requests.request(**req_args) + if check_status: + r.raise_for_status() except exceptions.RequestException as e: last_excp = [e] if last_excp: raise last_excp[-1] - if check_status: - r.raise_for_status() return UrlResponse(r.status_code, r.content, r.headers) -- cgit v1.2.3 From 6a6a2f7c337e18d84d7031ba60d5adf6a93256fc Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Mon, 22 Oct 2012 13:24:25 -0700 Subject: More cleanups for using the requests module. 1. Handle our own retries (so that we can sleep in between) 2. Cleanup the url so that partially invalid (no scheme) urls will continue working. 3. Allow redirects option now a param. --- cloudinit/config/cc_phone_home.py | 2 +- cloudinit/url_helper.py | 103 ++++++++++++++++++++------------------ 2 files changed, 54 insertions(+), 51 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/config/cc_phone_home.py b/cloudinit/config/cc_phone_home.py index a268d66f..ae1349eb 100644 --- a/cloudinit/config/cc_phone_home.py +++ b/cloudinit/config/cc_phone_home.py @@ -112,7 +112,7 @@ def handle(name, cfg, cloud, log, args): } url = templater.render_string(url, url_params) try: - uhelp.readurl(url, data=real_submit_keys, retries=tries) + uhelp.readurl(url, data=real_submit_keys, retries=tries, sec_between=3) except: util.logexc(log, ("Failed to post phone home data to" " %s in %s tries"), url, tries) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 93df2510..e8330e24 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -25,7 +25,7 @@ import time import requests from requests import exceptions -from urlparse import urlparse +from urlparse import (urlparse, urlunparse) from cloudinit import log as logging from cloudinit import version @@ -48,42 +48,20 @@ except: pass -class UrlResponse(object): - def __init__(self, status_code, contents=None, headers=None): - self._status_code = status_code - self._contents = contents - self._headers = headers +def _cleanurl(url): + parsed_url = list(urlparse(url, scheme='http')) + if not parsed_url[1] and parsed_url[2]: + # Swap these since this seems to be a common + # occurrence when given urls like 'www.google.com' + parsed_url[1] = parsed_url[2] + parsed_url[2] = '' + return urlunparse(parsed_url) - @property - def code(self): - return self._status_code - @property - def contents(self): - return self._contents - - @property - def headers(self): - return self._headers - - def __str__(self): - if not self.contents: - return '' - else: - return str(self.contents) - - def ok(self, redirects_ok=False): - upper = 300 - if redirects_ok: - upper = 400 - if self.code >= 200 and self.code < upper: - return True - else: - return False - - -def readurl(url, data=None, timeout=None, retries=0, - headers=None, ssl_details=None, check_status=True): +def readurl(url, data=None, timeout=None, retries=0, sec_between=1, + headers=None, ssl_details=None, check_status=True, + allow_redirects=False): + url = _cleanurl(url) req_args = { 'url': url, } @@ -98,7 +76,8 @@ def readurl(url, data=None, timeout=None, retries=0, if 'cert_file' in ssl_details and 'key_file' in ssl_details: req_args['cert'] = [ssl_details['cert_file'], ssl_details['key_file']] - req_args['allow_redirects'] = False + + req_args['allow_redirects'] = allow_redirects req_args['method'] = 'GET' if timeout is not None: req_args['timeout'] = max(float(timeout), 0) @@ -107,16 +86,19 @@ def readurl(url, data=None, timeout=None, retries=0, # It doesn't seem like config # was added in older library versions, thus we # need to manually do the retries if it wasn't - manual_tries = 1 if CONFIG_ENABLED: - req_config = {} - req_config['store_cookies'] = False - if retries: - req_config['max_retries'] = max(int(retries), 0) + req_config = { + 'store_cookies': False, + } + # Don't use the retry support built-in + # since it doesn't allow for 'sleep_times' + # in between tries.... + # if retries: + # req_config['max_retries'] = max(int(retries), 0) req_args['config'] = req_config - else: - if retries: - manual_tries = max(int(retries) + 1, 1) + manual_tries = 1 + if retries: + manual_tries = max(int(retries) + 1, 1) if not headers: headers = { 'User-Agent': 'Cloud-Init/%s' % (version.version_string()), @@ -126,17 +108,38 @@ def readurl(url, data=None, timeout=None, retries=0, if data: # Do this after the log (it might be large) req_args['data'] = data - last_excp = [] - for _i in range(0, manual_tries): + if sec_between is None: + sec_between = -1 + excps = [] + # Handle retrying ourselves since the built-in support + # doesn't handle sleeping between tries... + for i in range(0, manual_tries): try: r = requests.request(**req_args) if check_status: r.raise_for_status() + contents = r.content + status = r.status_code + headers = r.headers + LOG.debug("Read from %s (%s, %sb) after %s attempts", url, + status, len(contents), (i + 1)) + # Doesn't seem like we can make it use a different + # subclass for responses, so add our own backward-compat + # attrs + if not hasattr(r, 'code'): + setattr(r, 'code', status) + if not hasattr(r, 'contents'): + setattr(r, 'contents', contents) + return r except exceptions.RequestException as e: - last_excp = [e] - if last_excp: - raise last_excp[-1] - return UrlResponse(r.status_code, r.content, r.headers) + excps.append(e) + if i + 1 < manual_tries and sec_between > 0: + LOG.debug("Please wait %s seconds while we wait to try again", + sec_between) + time.sleep(sec_between) + if excps: + raise excps[-1] + return None # Should throw before this... def wait_for_url(urls, max_wait=None, timeout=None, -- cgit v1.2.3 From 6d40d5b2e3da9577d4a2686444d47125e62817fe Mon Sep 17 00:00:00 2001 From: harlowja Date: Tue, 19 Feb 2013 22:51:49 -0800 Subject: Continue working on integrating requests. --- cloudinit/config/cc_phone_home.py | 4 +-- cloudinit/ec2_utils.py | 24 +++++-------- cloudinit/sources/DataSourceEc2.py | 2 +- cloudinit/sources/DataSourceMAAS.py | 16 +++++---- cloudinit/url_helper.py | 50 ++++++++++++++++++++++---- cloudinit/user_data.py | 7 ++-- cloudinit/util.py | 71 ++++++++++++++++++++++++++++++++----- 7 files changed, 129 insertions(+), 45 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/config/cc_phone_home.py b/cloudinit/config/cc_phone_home.py index ae1349eb..90834080 100644 --- a/cloudinit/config/cc_phone_home.py +++ b/cloudinit/config/cc_phone_home.py @@ -19,7 +19,6 @@ # along with this program. If not, see . from cloudinit import templater -from cloudinit import url_helper as uhelp from cloudinit import util from cloudinit.settings import PER_INSTANCE @@ -112,7 +111,8 @@ def handle(name, cfg, cloud, log, args): } url = templater.render_string(url, url_params) try: - uhelp.readurl(url, data=real_submit_keys, retries=tries, sec_between=3) + util.read_file_or_url(url, data=real_submit_keys, + retries=tries, sec_between=3) except: util.logexc(log, ("Failed to post phone home data to" " %s in %s tries"), url, tries) diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index b9d7a2f7..c422eea9 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -22,7 +22,6 @@ import json import urllib from cloudinit import log as logging -from cloudinit import url_helper as uh from cloudinit import util LOG = logging.getLogger(__name__) @@ -40,11 +39,10 @@ def combine_url(base, add_on): # See: http://bit.ly/TyoUQs class MetadataMaterializer(object): - def __init__(self, blob, base_url, **fetch_settings): + def __init__(self, blob, base_url): self._blob = blob self._md = None self._base_url = base_url - self._fetch_settings = fetch_settings def _parse(self, blob): leaves = {} @@ -90,8 +88,8 @@ class MetadataMaterializer(object): self._md = self._materialize(self._blob, self._base_url) return self._md - def _fetch_url(self, url, **opts): - response = uh.readurl(url, **opts) + def _fetch_url(self, url): + response = util.read_file_or_url(url) return str(response) def _decode_leaf_blob(self, blob): @@ -115,12 +113,12 @@ class MetadataMaterializer(object): child_url = combine_url(base_url, c) if not child_url.endswith("/"): child_url += "/" - child_blob = self._fetch_url(child_url, **self._fetch_settings) + child_blob = self._fetch_url(child_url) child_contents[c] = self._materialize(child_blob, child_url) leaf_contents = {} for (field, resource) in leaves.items(): leaf_url = combine_url(base_url, resource) - leaf_blob = self._fetch_url(leaf_url, **self._fetch_settings) + leaf_blob = self._fetch_url(leaf_url) leaf_contents[field] = self._decode_leaf_blob(leaf_blob) joined = {} joined.update(child_contents) @@ -136,23 +134,19 @@ def get_instance_userdata(url, version='latest', ssl_details=None): ud_url = combine_url(url, version) ud_url = combine_url(ud_url, 'user-data') try: - response = uh.readurl(ud_url, timeout=5, - retries=10, ssl_details=ssl_details) + response = util.read_file_or_url(ud_url) return str(response) except Exception: util.logexc(LOG, "Failed fetching userdata from url %s", ud_url) return None -def get_instance_metadata(url, version='latest', ssl_details=None): +def get_instance_metadata(url, version='latest'): md_url = combine_url(url, version) md_url = combine_url(md_url, 'meta-data') try: - response = uh.readurl(md_url, timeout=5, - retries=10, ssl_details=ssl_details) - materializer = MetadataMaterializer(str(response), md_url, - timeout=5, retries=10, - ssl_details=ssl_details) + response = util.read_file_or_url(md_url) + materializer = MetadataMaterializer(str(response), md_url) return materializer.materialize() except Exception: util.logexc(LOG, "Failed fetching metadata from url %s", md_url) diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 204963e7..47f677d4 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -137,7 +137,7 @@ class DataSourceEc2(sources.DataSource): start_time = time.time() url = uhelp.wait_for_url(urls=urls, max_wait=max_wait, - timeout=timeout, status_cb=LOG.warn) + timeout=timeout, status_cb=LOG.warn) if url: LOG.debug("Using metadata source: '%s'", url2base[url]) diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py index e187aec9..2de31305 100644 --- a/cloudinit/sources/DataSourceMAAS.py +++ b/cloudinit/sources/DataSourceMAAS.py @@ -25,9 +25,11 @@ import os import time import urllib2 +import requests + from cloudinit import log as logging from cloudinit import sources -from cloudinit import url_helper as uhelp +from cloudinit import url_helper from cloudinit import util LOG = logging.getLogger(__name__) @@ -191,8 +193,8 @@ def read_maas_seed_url(seed_url, header_cb=None, timeout=None, version=MD_VERSION): """ Read the maas datasource at seed_url. - header_cb is a method that should return a headers dictionary that will - be given to urllib2.Request() + - header_cb is a method that should return a headers dictionary for + a given url Expected format of seed_url is are the following files: * //meta-data/instance-id @@ -220,13 +222,13 @@ def read_maas_seed_url(seed_url, header_cb=None, timeout=None, else: headers = {} try: - resp = uhelp.readurl(url, headers=headers, timeout=timeout) - if resp.ok(): + resp = util.read_file_or_url(url, headers=headers, timeout=timeout) + if resp.ok: md[name] = str(resp) else: LOG.warn(("Fetching from %s resulted in" - " an invalid http code %s"), url, resp.code) - except urllib2.HTTPError as e: + " an invalid http code %s"), url, resp.status_code) + except url_helper.UrlError as e: if e.code != 404: raise return check_seed_contents(md, seed_url) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index e8330e24..0839e63b 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -58,6 +58,44 @@ def _cleanurl(url): return urlunparse(parsed_url) +class UrlResponse(object): + def __init__(self, response): + self._response = response + + @property + def contents(self): + return self._response.content + + @property + def url(self): + return self._response.url + + @property + def ok(self): + return self._response.ok + + @property + def headers(self): + return self._response.headers + + @property + def code(self): + return self._response.status_code + + def __str__(self): + return self.contents + + +class UrlError(IOError): + def __init__(self, cause): + IOError.__init__(self, str(cause)) + self.cause = cause + if isinstance(cause, exceptions.HTTPError) and cause.response: + self.code = cause.response.status_code + else: + self.code = None + + def readurl(url, data=None, timeout=None, retries=0, sec_between=1, headers=None, ssl_details=None, check_status=True, allow_redirects=False): @@ -76,6 +114,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, if 'cert_file' in ssl_details and 'key_file' in ssl_details: req_args['cert'] = [ssl_details['cert_file'], ssl_details['key_file']] + elif 'cert_file' in ssl_details: + req_args['cert'] = str(ssl_details['cert_file']) req_args['allow_redirects'] = allow_redirects req_args['method'] = 'GET' @@ -126,13 +166,9 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, # Doesn't seem like we can make it use a different # subclass for responses, so add our own backward-compat # attrs - if not hasattr(r, 'code'): - setattr(r, 'code', status) - if not hasattr(r, 'contents'): - setattr(r, 'contents', contents) - return r + return UrlResponse(r) except exceptions.RequestException as e: - excps.append(e) + excps.append(UrlError(e)) if i + 1 < manual_tries and sec_between > 0: LOG.debug("Please wait %s seconds while we wait to try again", sec_between) @@ -213,7 +249,7 @@ def wait_for_url(urls, max_wait=None, timeout=None, e = ValueError(reason) else: return url - except exceptions.RequestException as e: + except UrlError as e: reason = "request error [%s]" % e except Exception as e: reason = "unexpected error [%s]" % e diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index 803ffc3a..4a640f1e 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -29,7 +29,6 @@ from email.mime.text import MIMEText from cloudinit import handlers from cloudinit import log as logging -from cloudinit import url_helper from cloudinit import util LOG = logging.getLogger(__name__) @@ -173,10 +172,10 @@ class UserDataProcessor(object): if include_once_on and os.path.isfile(include_once_fn): content = util.load_file(include_once_fn) else: - resp = url_helper.readurl(include_url) - if include_once_on and resp.ok(): + resp = util.read_file_or_url(include_url) + if include_once_on and resp.ok: util.write_file(include_once_fn, str(resp), mode=0600) - if resp.ok(): + if resp.ok: content = str(resp) else: LOG.warn(("Fetching from %s resulted in" diff --git a/cloudinit/util.py b/cloudinit/util.py index f5a7ac12..da2cdeda 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -51,7 +51,7 @@ import yaml from cloudinit import importer from cloudinit import log as logging from cloudinit import safeyaml -from cloudinit import url_helper as uhelp +from cloudinit import url_helper from cloudinit.settings import (CFG_BUILTIN) @@ -69,6 +69,18 @@ FN_ALLOWED = ('_-.()' + string.digits + string.ascii_letters) CONTAINER_TESTS = ['running-in-container', 'lxc-is-container'] +class FileResponse(object): + def __init__(self, path, contents): + self.code = 200 + self.headers = {} + self.contents = contents + self.ok = True + self.url = path + + def __str__(self): + return self.contents + + class ProcessExecutionError(IOError): MESSAGE_TMPL = ('%(description)s\n' @@ -628,12 +640,53 @@ def read_optional_seed(fill, base="", ext="", timeout=5): raise -def read_file_or_url(url, timeout=5, retries=10, file_retries=0): +def fetch_ssl_details(paths=None): + ssl_details = {} + # Lookup in these locations for ssl key/cert files + ssl_cert_paths = [ + '/var/lib/cloud/data/ssl', + '/var/lib/cloud/instance/data/ssl', + ] + if paths: + ssl_cert_paths.extend([ + os.path.join(paths.get_ipath_cur('data'), 'ssl'), + os.path.join(paths.get_cpath('data'), 'ssl'), + ]) + ssl_cert_paths = uniq_merge(ssl_cert_paths) + ssl_cert_paths = [d for d in ssl_cert_paths if d and os.path.isdir(d)] + cert_file = None + for d in ssl_cert_paths: + if os.path.isfile(os.path.join(d, 'cert.pem')): + cert_file = os.path.join(d, 'cert.pem') + break + key_file = None + for d in ssl_cert_paths: + if os.path.isfile(os.path.join(d, 'key.pem')): + key_file = os.path.join(d, 'key.pem') + break + if cert_file and key_file: + ssl_details['cert_file'] = cert_file + ssl_details['key_file'] = key_file + elif cert_file: + ssl_details['cert_file'] = cert_file + return ssl_details + + +def read_file_or_url(url, timeout=5, retries=10, + headers=None, data=None, sec_between=1, paths=None): if url.startswith("/"): url = "file://%s" % url - if url.startswith("file://"): - retries = file_retries - return uhelp.readurl(url, timeout=timeout, retries=retries) + if url.lower().startswith("file://"): + file_path = url[len("file://"):] + return FileResponse(file_path, contents=load_file(file_path)) + else: + return url_helper.readurl(url, + timeout=timeout, + retries=retries, + headers=headers, + data=data, + sec_between=sec_between, + ssl_details=fetch_ssl_details(paths)) def load_yaml(blob, default=None, allowed=(dict,)): @@ -675,13 +728,13 @@ def read_seeded(base="", ext="", timeout=5, retries=10, file_retries=0): md_resp = read_file_or_url(md_url, timeout, retries, file_retries) md = None - if md_resp.ok(): + if md_resp.ok: md_str = str(md_resp) md = load_yaml(md_str, default={}) ud_resp = read_file_or_url(ud_url, timeout, retries, file_retries) ud = None - if ud_resp.ok(): + if ud_resp.ok: ud_str = str(ud_resp) ud = ud_str @@ -850,8 +903,8 @@ def get_cmdline_url(names=('cloud-config-url', 'url'), if not url: return (None, None, None) - resp = uhelp.readurl(url) - if resp.contents.startswith(starts) and resp.ok(): + resp = read_file_or_url(url) + if resp.contents.startswith(starts) and resp.ok: return (key, url, str(resp)) return (key, url, None) -- cgit v1.2.3 From eacfc7ffbec3e6a0348ed484da895e2d2fc5ba10 Mon Sep 17 00:00:00 2001 From: harlowja Date: Sat, 23 Feb 2013 21:23:24 -0800 Subject: Get tests working and further adjustments. --- cloudinit/sources/DataSourceMAAS.py | 9 +++++--- cloudinit/url_helper.py | 21 +++++++++-------- cloudinit/user_data.py | 8 ++++--- cloudinit/util.py | 34 ++++++++++++++++++++-------- tests/unittests/test__init__.py | 10 ++++---- tests/unittests/test_datasource/test_maas.py | 11 +++++---- 6 files changed, 60 insertions(+), 33 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py index 6e1133b2..0c526305 100644 --- a/cloudinit/sources/DataSourceMAAS.py +++ b/cloudinit/sources/DataSourceMAAS.py @@ -223,9 +223,12 @@ def read_maas_seed_url(seed_url, header_cb=None, timeout=None, else: headers = {} try: - resp = util.read_file_or_url(url, headers=headers, timeout=timeout, - ssl_details=util.fetch_ssl_details(paths)) - if resp.ok: + ssl_details = util.fetch_ssl_details(paths) + resp = util.read_file_or_url(url, + headers=headers, + timeout=timeout, + ssl_details=ssl_details) + if resp.ok(): md[name] = str(resp) else: LOG.warn(("Fetching from %s resulted in" diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 0839e63b..300e70c2 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -70,9 +70,14 @@ class UrlResponse(object): def url(self): return self._response.url - @property - def ok(self): - return self._response.ok + def ok(self, redirects_ok=False): + upper = 300 + if redirects_ok: + upper = 400 + if self.code >= 200 and self.code < upper: + return True + else: + return False @property def headers(self): @@ -158,11 +163,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, r = requests.request(**req_args) if check_status: r.raise_for_status() - contents = r.content - status = r.status_code - headers = r.headers LOG.debug("Read from %s (%s, %sb) after %s attempts", url, - status, len(contents), (i + 1)) + r.status_code, len(r.content), (i + 1)) # Doesn't seem like we can make it use a different # subclass for responses, so add our own backward-compat # attrs @@ -256,8 +258,9 @@ def wait_for_url(urls, max_wait=None, timeout=None, time_taken = int(time.time() - start_time) status_msg = "Calling '%s' failed [%s/%ss]: %s" % (url, - time_taken, - max_wait, reason) + time_taken, + max_wait, + reason) status_cb(status_msg) if exception_cb: exception_cb(msg=status_msg, exception=e) diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index eaf448a7..df069ff8 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -59,6 +59,7 @@ EXAMINE_FOR_LAUNCH_INDEX = ["text/cloud-config"] class UserDataProcessor(object): def __init__(self, paths): self.paths = paths + self.ssl_details = util.fetch_ssl_details(paths) def process(self, blob): accumulating_msg = MIMEMultipart() @@ -172,10 +173,11 @@ class UserDataProcessor(object): if include_once_on and os.path.isfile(include_once_fn): content = util.load_file(include_once_fn) else: - resp = util.read_file_or_url(include_url) - if include_once_on and resp.ok: + resp = util.read_file_or_url(include_url, + ssl_details=self.ssl_details) + if include_once_on and resp.ok(): util.write_file(include_once_fn, str(resp), mode=0600) - if resp.ok: + if resp.ok(): content = str(resp) else: LOG.warn(("Fetching from %s resulted in" diff --git a/cloudinit/util.py b/cloudinit/util.py index 42b3ab01..dc3c5639 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -70,18 +70,31 @@ FN_ALLOWED = ('_-.()' + string.digits + string.ascii_letters) CONTAINER_TESTS = ['running-in-container', 'lxc-is-container'] -class FileResponse(object): - def __init__(self, path, contents): - self.code = 200 +# Made to have same accessors as UrlResponse so that the +# read_file_or_url can return this or that object and the +# 'user' of those objects will not need to know the difference. +class StringResponse(object): + def __init__(self, contents, code=200): + self.code = code self.headers = {} self.contents = contents - self.ok = True - self.url = path + self.url = None + + def ok(self, *args, **kwargs): + if self.code != 200: + return False + return True def __str__(self): return self.contents +class FileResponse(StringResponse): + def __init__(self, path, contents, code=200): + StringResponse.__init__(self, contents, code=code) + self.url = path + + class ProcessExecutionError(IOError): MESSAGE_TMPL = ('%(description)s\n' @@ -630,7 +643,7 @@ def read_optional_seed(fill, base="", ext="", timeout=5): fill['user-data'] = ud fill['meta-data'] = md return True - except OSError as e: + except IOError as e: if e.errno == errno.ENOENT: return False raise @@ -670,9 +683,12 @@ def fetch_ssl_details(paths=None): def read_file_or_url(url, timeout=5, retries=10, headers=None, data=None, sec_between=1, ssl_details=None): + url = url.lstrip() if url.startswith("/"): url = "file://%s" % url if url.lower().startswith("file://"): + if data: + LOG.warn("Unable to post data to file resource %s", url) file_path = url[len("file://"):] return FileResponse(file_path, contents=load_file(file_path)) else: @@ -724,13 +740,13 @@ def read_seeded(base="", ext="", timeout=5, retries=10, file_retries=0): md_resp = read_file_or_url(md_url, timeout, retries, file_retries) md = None - if md_resp.ok: + if md_resp.ok(): md_str = str(md_resp) md = load_yaml(md_str, default={}) ud_resp = read_file_or_url(ud_url, timeout, retries, file_retries) ud = None - if ud_resp.ok: + if ud_resp.ok(): ud_str = str(ud_resp) ud = ud_str @@ -900,7 +916,7 @@ def get_cmdline_url(names=('cloud-config-url', 'url'), return (None, None, None) resp = read_file_or_url(url) - if resp.contents.startswith(starts) and resp.ok: + if resp.contents.startswith(starts) and resp.ok(): return (key, url, str(resp)) return (key, url, None) diff --git a/tests/unittests/test__init__.py b/tests/unittests/test__init__.py index ac082076..d707afa9 100644 --- a/tests/unittests/test__init__.py +++ b/tests/unittests/test__init__.py @@ -191,8 +191,8 @@ class TestCmdlineUrl(MockerTestCase): mock_readurl = self.mocker.replace(url_helper.readurl, passthrough=False) - mock_readurl(url) - self.mocker.result(url_helper.UrlResponse(200, payload)) + mock_readurl(url, ARGS, KWARGS) + self.mocker.result(util.StringResponse(payload)) self.mocker.replay() self.assertEqual((key, url, None), @@ -207,8 +207,8 @@ class TestCmdlineUrl(MockerTestCase): mock_readurl = self.mocker.replace(url_helper.readurl, passthrough=False) - mock_readurl(url) - self.mocker.result(url_helper.UrlResponse(200, payload)) + mock_readurl(url, ARGS, KWARGS) + self.mocker.result(util.StringResponse(payload)) self.mocker.replay() self.assertEqual((key, url, payload), @@ -221,7 +221,7 @@ class TestCmdlineUrl(MockerTestCase): cmdline = "ro %s=%s bar=1" % (key, url) self.mocker.replace(url_helper.readurl, passthrough=False) - self.mocker.result(url_helper.UrlResponse(400)) + self.mocker.result(util.StringResponse("")) self.mocker.replay() self.assertEqual((None, None, None), diff --git a/tests/unittests/test_datasource/test_maas.py b/tests/unittests/test_datasource/test_maas.py index b56fea82..47f8caa4 100644 --- a/tests/unittests/test_datasource/test_maas.py +++ b/tests/unittests/test_datasource/test_maas.py @@ -3,12 +3,13 @@ import os from cloudinit.sources import DataSourceMAAS from cloudinit import url_helper +from cloudinit import util from tests.unittests.helpers import populate_dir -from mocker import MockerTestCase +import mocker -class TestMAASDataSource(MockerTestCase): +class TestMAASDataSource(mocker.MockerTestCase): def setUp(self): super(TestMAASDataSource, self).setUp() @@ -115,9 +116,11 @@ class TestMAASDataSource(MockerTestCase): for key in valid_order: url = "%s/%s/%s" % (my_seed, my_ver, key) - mock_request(url, headers=my_headers, timeout=None) + mock_request(url, headers=my_headers, timeout=mocker.ANY, + data=mocker.ANY, sec_between=mocker.ANY, + ssl_details=mocker.ANY, retries=mocker.ANY) resp = valid.get(key) - self.mocker.result(url_helper.UrlResponse(200, resp)) + self.mocker.result(util.StringResponse(resp)) self.mocker.replay() (userdata, metadata) = DataSourceMAAS.read_maas_seed_url(my_seed, -- cgit v1.2.3 From f22388a04076247490c02f605b176c3c45ecd425 Mon Sep 17 00:00:00 2001 From: harlowja Date: Sat, 23 Feb 2013 23:22:57 -0800 Subject: Fix the maas callback mechanism now that requests is used. --- cloudinit/sources/DataSourceMAAS.py | 25 ++++++++++++------------ cloudinit/url_helper.py | 38 +++++++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 27 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py index 0c526305..f3759b4b 100644 --- a/cloudinit/sources/DataSourceMAAS.py +++ b/cloudinit/sources/DataSourceMAAS.py @@ -25,8 +25,6 @@ import os import time import urllib2 -import requests - from cloudinit import log as logging from cloudinit import sources from cloudinit import url_helper @@ -81,7 +79,7 @@ class DataSourceMAAS(sources.DataSource): self.base_url = url (userdata, metadata) = read_maas_seed_url(self.base_url, - self.md_headers, + self._md_headers, paths=self.paths) self.userdata_raw = userdata self.metadata = metadata @@ -90,7 +88,7 @@ class DataSourceMAAS(sources.DataSource): util.logexc(LOG, "Failed fetching metadata from url %s", url) return False - def md_headers(self, url): + def _md_headers(self, url): mcfg = self.ds_cfg # If we are missing token_key, token_secret or consumer_key @@ -134,9 +132,10 @@ class DataSourceMAAS(sources.DataSource): starttime = time.time() check_url = "%s/%s/meta-data/instance-id" % (url, MD_VERSION) urls = [check_url] - url = uhelp.wait_for_url(urls=urls, max_wait=max_wait, - timeout=timeout, exception_cb=self._except_cb, - headers_cb=self.md_headers) + url = url_helper.wait_for_url(urls=urls, max_wait=max_wait, + timeout=timeout, + exception_cb=self._except_cb, + headers_cb=self._md_headers) if url: LOG.debug("Using metadata source: '%s'", url) @@ -147,23 +146,23 @@ class DataSourceMAAS(sources.DataSource): return bool(url) def _except_cb(self, msg, exception): - if not (isinstance(exception, urllib2.HTTPError) and + if not (isinstance(exception, url_helper.UrlError) and (exception.code == 403 or exception.code == 401)): return + if 'date' not in exception.headers: - LOG.warn("date field not in %d headers" % exception.code) + LOG.warn("Missing header 'date' in %s response", exception.code) return date = exception.headers['date'] - try: ret_time = time.mktime(parsedate(date)) - except: - LOG.warn("failed to convert datetime '%s'") + except Exception as e: + LOG.warn("Failed to convert datetime '%s': %s", date, e) return self.oauth_clockskew = int(ret_time - time.time()) - LOG.warn("set oauth clockskew to %d" % self.oauth_clockskew) + LOG.warn("Setting oauth clockskew to %d", self.oauth_clockskew) return diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 300e70c2..6f06761a 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -92,13 +92,13 @@ class UrlResponse(object): class UrlError(IOError): - def __init__(self, cause): + def __init__(self, cause, code=None, headers=None): IOError.__init__(self, str(cause)) self.cause = cause - if isinstance(cause, exceptions.HTTPError) and cause.response: - self.code = cause.response.status_code - else: - self.code = None + self.code = code + self.headers = headers + if self.headers is None: + self.headers = {} def readurl(url, data=None, timeout=None, retries=0, sec_between=1, @@ -170,7 +170,11 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, # attrs return UrlResponse(r) except exceptions.RequestException as e: - excps.append(UrlError(e)) + if isinstance(e, (exceptions.HTTPError)) and e.response: + excps.append(UrlError(e, code=e.response.status_code, + headers=e.response.headers)) + else: + excps.append(UrlError(e)) if i + 1 < manual_tries and sec_between > 0: LOG.debug("Please wait %s seconds while we wait to try again", sec_between) @@ -235,20 +239,23 @@ def wait_for_url(urls, max_wait=None, timeout=None, timeout = int((start_time + max_wait) - now) reason = "" + e = None try: if headers_cb is not None: headers = headers_cb(url) else: headers = {} - resp = readurl(url, headers=headers, timeout=timeout, - check_status=False) - if not resp.contents: - reason = "empty response [%s]" % (resp.code) - e = ValueError(reason) - elif not resp.ok(): - reason = "bad status code [%s]" % (resp.code) - e = ValueError(reason) + response = readurl(url, headers=headers, timeout=timeout, + check_status=False) + if not response.contents: + reason = "empty response [%s]" % (response.code) + e = UrlError(ValueError(reason), + code=response.code, headers=response.headers) + elif not response.ok(): + reason = "bad status code [%s]" % (response.code) + e = UrlError(ValueError(reason), + code=response.code, headers=response.headers) else: return url except UrlError as e: @@ -263,6 +270,9 @@ def wait_for_url(urls, max_wait=None, timeout=None, reason) status_cb(status_msg) if exception_cb: + # This can be used to alter the headers that will be sent + # in the future, for example this is what the MAAS datasource + # does. exception_cb(msg=status_msg, exception=e) if timeup(max_wait, start_time): -- cgit v1.2.3 From 6ded151bd19d27cd03e22dbf2e98914b12504c78 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Wed, 13 Mar 2013 22:28:39 -0700 Subject: Update to handle requests >= 1.0 which doesn't use the config dict. --- cloudinit/url_helper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 6f06761a..08e5f01b 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -34,7 +34,7 @@ LOG = logging.getLogger(__name__) # Check if requests has ssl support (added in requests >= 0.8.8) SSL_ENABLED = False -CONFIG_ENABLED = False # This was added in 0.7 +CONFIG_ENABLED = False # This was added in 0.7 (but taken out in >=1.0) try: import pkg_resources from distutils.version import LooseVersion @@ -42,7 +42,7 @@ try: _REQ_VER = LooseVersion(_REQ.version) if _REQ_VER >= LooseVersion('0.8.8'): SSL_ENABLED = True - if _REQ_VER >= LooseVersion('0.7.0'): + if _REQ_VER >= LooseVersion('0.7.0') and _REQ_VER < LooseVersion('1.0.0'): CONFIG_ENABLED = True except: pass @@ -129,8 +129,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, if data: req_args['method'] = 'POST' # It doesn't seem like config - # was added in older library versions, thus we - # need to manually do the retries if it wasn't + # was added in older library versions (or newer ones either), thus we + # need to manually do the retries if it wasn't... if CONFIG_ENABLED: req_config = { 'store_cookies': False, -- cgit v1.2.3 From 204e79b93c882e17df63b24f7f682c0dbefb482d Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Wed, 13 Mar 2013 22:33:18 -0700 Subject: Fix how the http error doesn't always have the response attached in earlier versions of requests (pre 0.10.8). --- cloudinit/url_helper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 08e5f01b..bfc5cfdd 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -170,7 +170,9 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, # attrs return UrlResponse(r) except exceptions.RequestException as e: - if isinstance(e, (exceptions.HTTPError)) and e.response: + if (isinstance(e, (exceptions.HTTPError)) + and hasattr(e, 'response') # This appeared in v 0.10.8 + and e.response): excps.append(UrlError(e, code=e.response.status_code, headers=e.response.headers)) else: -- cgit v1.2.3 From 8fbe938228909e153afb88f189b269df60501510 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Tue, 19 Mar 2013 09:32:04 -0400 Subject: appease pylint and pep8 * cloudinit/distros/parsers/resolv_conf.py added some pylint overrides with 'plXXXXX' syntax. example: # pl51222 pylint: disable=E0102 The pl51222 there means: http://www.logilab.org/ticket/51222 This specific issue is present in 12.04 pylint, but not 13.04. * pylint doesn't like the requests special handling we have. which makes sense as it is only checking versus one specific version. * general pep8 and pylint cleanups. --- cloudinit/distros/parsers/resolv_conf.py | 4 ++-- cloudinit/ec2_utils.py | 1 + cloudinit/url_helper.py | 22 ++++++++++++---------- cloudinit/util.py | 4 ++-- 4 files changed, 17 insertions(+), 14 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/distros/parsers/resolv_conf.py b/cloudinit/distros/parsers/resolv_conf.py index 5733c25a..1be9d46b 100644 --- a/cloudinit/distros/parsers/resolv_conf.py +++ b/cloudinit/distros/parsers/resolv_conf.py @@ -137,8 +137,8 @@ class ResolvConf(object): self._contents.append(('option', ['search', s_list, ''])) return flat_sds - @local_domain.setter - def local_domain(self, domain): + @local_domain.setter # pl51222 pylint: disable=E1101 + def local_domain(self, domain): # pl51222 pylint: disable=E0102 self.parse() self._remove_option('domain') self._contents.append(('option', ['domain', str(domain), ''])) diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index 4812eccb..6b2754aa 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -145,6 +145,7 @@ def get_instance_userdata(api_version, metadata_address, ssl_details=None): util.logexc(LOG, "Failed fetching userdata from url %s", ud_url) return None + def get_instance_metadata(api_version, metadata_address, ssl_details=None): md_url = combine_url(metadata_address, api_version) md_url = combine_url(md_url, 'meta-data') diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index bfc5cfdd..de73cc84 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -34,12 +34,12 @@ LOG = logging.getLogger(__name__) # Check if requests has ssl support (added in requests >= 0.8.8) SSL_ENABLED = False -CONFIG_ENABLED = False # This was added in 0.7 (but taken out in >=1.0) +CONFIG_ENABLED = False # This was added in 0.7 (but taken out in >=1.0) try: - import pkg_resources from distutils.version import LooseVersion + import pkg_resources _REQ = pkg_resources.get_distribution('requests') - _REQ_VER = LooseVersion(_REQ.version) + _REQ_VER = LooseVersion(_REQ.version) # pylint: disable=E1103 if _REQ_VER >= LooseVersion('0.8.8'): SSL_ENABLED = True if _REQ_VER >= LooseVersion('0.7.0') and _REQ_VER < LooseVersion('1.0.0'): @@ -49,7 +49,7 @@ except: def _cleanurl(url): - parsed_url = list(urlparse(url, scheme='http')) + parsed_url = list(urlparse(url, scheme='http')) # pylint: disable=E1123 if not parsed_url[1] and parsed_url[2]: # Swap these since this seems to be a common # occurrence when given urls like 'www.google.com' @@ -108,7 +108,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, req_args = { 'url': url, } - if urlparse(url).scheme == 'https' and ssl_details: + scheme = urlparse(url).scheme # pylint: disable=E1101 + if scheme == 'https' and ssl_details: if not SSL_ENABLED: LOG.warn("SSL is not enabled, cert. verification can not occur!") else: @@ -121,7 +122,7 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, ssl_details['key_file']] elif 'cert_file' in ssl_details: req_args['cert'] = str(ssl_details['cert_file']) - + req_args['allow_redirects'] = allow_redirects req_args['method'] = 'GET' if timeout is not None: @@ -162,16 +163,17 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, try: r = requests.request(**req_args) if check_status: - r.raise_for_status() + r.raise_for_status() # pylint: disable=E1103 LOG.debug("Read from %s (%s, %sb) after %s attempts", url, - r.status_code, len(r.content), (i + 1)) + r.status_code, len(r.content), # pylint: disable=E1103 + (i + 1)) # Doesn't seem like we can make it use a different # subclass for responses, so add our own backward-compat # attrs return UrlResponse(r) except exceptions.RequestException as e: if (isinstance(e, (exceptions.HTTPError)) - and hasattr(e, 'response') # This appeared in v 0.10.8 + and hasattr(e, 'response') # This appeared in v 0.10.8 and e.response): excps.append(UrlError(e, code=e.response.status_code, headers=e.response.headers)) @@ -183,7 +185,7 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, time.sleep(sec_between) if excps: raise excps[-1] - return None # Should throw before this... + return None # Should throw before this... def wait_for_url(urls, max_wait=None, timeout=None, diff --git a/cloudinit/util.py b/cloudinit/util.py index 52b528ea..36e9b83b 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -51,8 +51,8 @@ from cloudinit import importer from cloudinit import log as logging from cloudinit import mergers from cloudinit import safeyaml -from cloudinit import url_helper from cloudinit import type_utils +from cloudinit import url_helper from cloudinit import version from cloudinit.settings import (CFG_BUILTIN) @@ -81,7 +81,7 @@ class StringResponse(object): self.contents = contents self.url = None - def ok(self, *args, **kwargs): + def ok(self, *args, **kwargs): # pylint: disable=W0613 if self.code != 200: return False return True -- cgit v1.2.3 From fc77e3f4bc9b0736246abd05bfca8dda04cff0eb Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Tue, 19 Mar 2013 09:51:02 -0400 Subject: do not bother retrying on ssl errors if the error is an ssl error, its extremely unlikely that it would be fixed by waiting a few seconds and trying again. --- cloudinit/url_helper.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index de73cc84..ac6f25db 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -179,6 +179,10 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, headers=e.response.headers)) else: excps.append(UrlError(e)) + if SSL_ENABLED and isinstance(e, exceptions.SSLError): + # ssl exceptions are not going to get fixed by waiting a + # few seconds + break if i + 1 < manual_tries and sec_between > 0: LOG.debug("Please wait %s seconds while we wait to try again", sec_between) -- cgit v1.2.3 From 92b23e3d27623440b3b37ccb9d865b235a99f5f1 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Tue, 19 Mar 2013 10:05:51 -0400 Subject: set 'allow_redirects' to True by default the previous implementation of url_helper.readurl() would default to allow_redirects being true. So, for backwards compat, we should keep that behavior. --- cloudinit/url_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index ac6f25db..6b4516e0 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -103,7 +103,7 @@ class UrlError(IOError): def readurl(url, data=None, timeout=None, retries=0, sec_between=1, headers=None, ssl_details=None, check_status=True, - allow_redirects=False): + allow_redirects=True): url = _cleanurl(url) req_args = { 'url': url, -- cgit v1.2.3 From 1b7e36a966ce1a0964e93eefa98c9efcbc4c323d Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 25 Apr 2013 11:58:38 -0400 Subject: re-work maas datasource and headers call backs Couple of things here: * do not re-try on user-data (404 means 'not here') * re-generate headers on retry requests LP: #1172742 --- cloudinit/sources/DataSourceMAAS.py | 16 +++++++++++----- cloudinit/url_helper.py | 18 +++++++++++++----- cloudinit/util.py | 4 +++- 3 files changed, 27 insertions(+), 11 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py index de822924..dfe90bc6 100644 --- a/cloudinit/sources/DataSourceMAAS.py +++ b/cloudinit/sources/DataSourceMAAS.py @@ -218,14 +218,20 @@ def read_maas_seed_url(seed_url, header_cb=None, timeout=None, md = {} for name in file_order: url = files.get(name) - if header_cb: - headers = header_cb(url) + if not header_cb: + def _cb(url): + return {} + header_cb = _cb + + if name == 'user-data': + retries = 0 else: - headers = {} + retries = None + try: ssl_details = util.fetch_ssl_details(paths) - resp = util.read_file_or_url(url, - headers=headers, + resp = util.read_file_or_url(url, retries=retries, + headers_cb=header_cb, timeout=timeout, ssl_details=ssl_details) if resp.ok(): diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 6b4516e0..24ce6871 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -102,8 +102,8 @@ class UrlError(IOError): def readurl(url, data=None, timeout=None, retries=0, sec_between=1, - headers=None, ssl_details=None, check_status=True, - allow_redirects=True): + headers=None, headers_cb=None, ssl_details=None, + check_status=True, allow_redirects=True): url = _cleanurl(url) req_args = { 'url': url, @@ -149,8 +149,11 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, headers = { 'User-Agent': 'Cloud-Init/%s' % (version.version_string()), } - req_args['headers'] = headers - LOG.debug("Attempting to open '%s' with %s configuration", url, req_args) + if not headers_cb: + def _cb(url): + return headers + headers_cb = _cb + if data: # Do this after the log (it might be large) req_args['data'] = data @@ -161,6 +164,11 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, # doesn't handle sleeping between tries... for i in range(0, manual_tries): try: + req_args['headers'] = headers_cb(url) + LOG.debug("[%s/%s] open '%s' with %s configuration", i, + manual_tries, url, + {k: req_args[k] for k in req_args if k != 'data'}) + r = requests.request(**req_args) if check_status: r.raise_for_status() # pylint: disable=E1103 @@ -174,7 +182,7 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, except exceptions.RequestException as e: if (isinstance(e, (exceptions.HTTPError)) and hasattr(e, 'response') # This appeared in v 0.10.8 - and e.response): + and hasattr(e.response, 'status_code')): excps.append(UrlError(e, code=e.response.status_code, headers=e.response.headers)) else: diff --git a/cloudinit/util.py b/cloudinit/util.py index 50de55fe..053fa95d 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -671,7 +671,8 @@ def fetch_ssl_details(paths=None): def read_file_or_url(url, timeout=5, retries=10, - headers=None, data=None, sec_between=1, ssl_details=None): + headers=None, data=None, sec_between=1, ssl_details=None, + headers_cb=None): url = url.lstrip() if url.startswith("/"): url = "file://%s" % url @@ -685,6 +686,7 @@ def read_file_or_url(url, timeout=5, retries=10, timeout=timeout, retries=retries, headers=headers, + headers_cb=headers_cb, data=data, sec_between=sec_between, ssl_details=ssl_details) -- cgit v1.2.3 From e4677e5ef69ff523459d97405dcf90fe6818555e Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Fri, 3 May 2013 15:11:32 -0700 Subject: Fix a feature that doesn't yet exist on python 2.6 --- cloudinit/url_helper.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'cloudinit/url_helper.py') diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 24ce6871..19a30409 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -165,9 +165,14 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, for i in range(0, manual_tries): try: req_args['headers'] = headers_cb(url) + filtered_req_args = {} + for (k, v) in req_args.items(): + if k == 'data': + continue + filtered_req_args[k] = v + LOG.debug("[%s/%s] open '%s' with %s configuration", i, - manual_tries, url, - {k: req_args[k] for k in req_args if k != 'data'}) + manual_tries, url, filtered_req_args) r = requests.request(**req_args) if check_status: -- cgit v1.2.3