summaryrefslogtreecommitdiff
path: root/cloudinit/url_helper.py
diff options
context:
space:
mode:
Diffstat (limited to 'cloudinit/url_helper.py')
-rw-r--r--cloudinit/url_helper.py226
1 files changed, 226 insertions, 0 deletions
diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py
new file mode 100644
index 00000000..dbf72392
--- /dev/null
+++ b/cloudinit/url_helper.py
@@ -0,0 +1,226 @@
+# vi: ts=4 expandtab
+#
+# Copyright (C) 2012 Canonical Ltd.
+# Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
+# Copyright (C) 2012 Yahoo! Inc.
+#
+# Author: Scott Moser <scott.moser@canonical.com>
+# Author: Juerg Haefliger <juerg.haefliger@hp.com>
+# Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3, as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from contextlib import closing
+
+import errno
+import socket
+import time
+import urllib
+import urllib2
+
+from cloudinit import log as logging
+from cloudinit import version
+
+LOG = logging.getLogger(__name__)
+
+
+class UrlResponse(object):
+ def __init__(self, status_code, contents=None, headers=None):
+ self._status_code = status_code
+ self._contents = contents
+ self._headers = headers
+
+ @property
+ def code(self):
+ return self._status_code
+
+ @property
+ def contents(self):
+ return self._contents
+
+ @property
+ def headers(self):
+ return self._headers
+
+ def __str__(self):
+ if not self.contents:
+ return ''
+ else:
+ return str(self.contents)
+
+ def ok(self, redirects_ok=False):
+ upper = 300
+ if redirects_ok:
+ upper = 400
+ if self.code >= 200 and self.code < upper:
+ return True
+ else:
+ return False
+
+
+def readurl(url, data=None, timeout=None,
+ retries=0, sec_between=1, headers=None):
+
+ req_args = {}
+ req_args['url'] = url
+ if data is not None:
+ req_args['data'] = urllib.urlencode(data)
+
+ if not headers:
+ headers = {
+ 'User-Agent': 'Cloud-Init/%s' % (version.version_string()),
+ }
+
+ req_args['headers'] = headers
+ req = urllib2.Request(**req_args)
+
+ retries = max(retries, 0)
+ attempts = retries + 1
+
+ excepts = []
+ LOG.debug(("Attempting to open '%s' with %s attempts"
+ " (%s retries, timeout=%s) to be performed"),
+ url, attempts, retries, timeout)
+ open_args = {}
+ if timeout is not None:
+ open_args['timeout'] = int(timeout)
+ for i in range(0, attempts):
+ try:
+ with closing(urllib2.urlopen(req, **open_args)) as rh:
+ content = rh.read()
+ status = rh.getcode()
+ if status is None:
+ # This seems to happen when files are read...
+ status = 200
+ headers = {}
+ if rh.headers:
+ headers = dict(rh.headers)
+ LOG.debug("Read from %s (%s, %sb) after %s attempts",
+ url, status, len(content), (i + 1))
+ return UrlResponse(status, content, headers)
+ except urllib2.HTTPError as e:
+ excepts.append(e)
+ except urllib2.URLError as e:
+ # This can be a message string or
+ # another exception instance
+ # (socket.error for remote URLs, OSError for local URLs).
+ if (isinstance(e.reason, (OSError)) and
+ e.reason.errno == errno.ENOENT):
+ excepts.append(e.reason)
+ else:
+ excepts.append(e)
+ except Exception as e:
+ excepts.append(e)
+ if i + 1 < attempts:
+ LOG.debug("Please wait %s seconds while we wait to try again",
+ sec_between)
+ time.sleep(sec_between)
+
+ # Didn't work out
+ LOG.warn("Failed reading from %s after %s attempts", url, attempts)
+
+ # It must of errored at least once for code
+ # to get here so re-raise the last error
+ LOG.debug("%s errors occured, re-raising the last one", len(excepts))
+ raise excepts[-1]
+
+
+def wait_for_url(urls, max_wait=None, timeout=None,
+ status_cb=None, headers_cb=None, sleep_time=1):
+ """
+ urls: a list of urls to try
+ max_wait: roughly the maximum time to wait before giving up
+ The max time is *actually* len(urls)*timeout as each url will
+ be tried once and given the timeout provided.
+ timeout: the timeout provided to urllib2.urlopen
+ status_cb: call method with string message when a url is not available
+ headers_cb: call method with single argument of url to get headers
+ for request.
+
+ the idea of this routine is to wait for the EC2 metdata service to
+ come up. On both Eucalyptus and EC2 we have seen the case where
+ the instance hit the MD before the MD service was up. EC2 seems
+ to have permenantely fixed this, though.
+
+ In openstack, the metadata service might be painfully slow, and
+ unable to avoid hitting a timeout of even up to 10 seconds or more
+ (LP: #894279) for a simple GET.
+
+ Offset those needs with the need to not hang forever (and block boot)
+ on a system where cloud-init is configured to look for EC2 Metadata
+ service but is not going to find one. It is possible that the instance
+ data host (169.254.169.254) may be firewalled off Entirely for a sytem,
+ meaning that the connection will block forever unless a timeout is set.
+ """
+ start_time = time.time()
+
+ def log_status_cb(msg):
+ LOG.debug(msg)
+
+ if status_cb is None:
+ status_cb = log_status_cb
+
+ def timeup(max_wait, start_time):
+ return ((max_wait <= 0 or max_wait is None) or
+ (time.time() - start_time > max_wait))
+
+ loop_n = 0
+ while True:
+ sleep_time = int(loop_n / 5) + 1
+ for url in urls:
+ now = time.time()
+ if loop_n != 0:
+ if timeup(max_wait, start_time):
+ break
+ if timeout and (now + timeout > (start_time + max_wait)):
+ # shorten timeout to not run way over max_time
+ timeout = int((start_time + max_wait) - now)
+
+ reason = ""
+ try:
+ if headers_cb is not None:
+ headers = headers_cb(url)
+ else:
+ headers = {}
+
+ resp = readurl(url, headers=headers, timeout=timeout)
+ if not resp.contents:
+ reason = "empty response [%s]" % (resp.code)
+ elif not resp.ok():
+ reason = "bad status code [%s]" % (resp.code)
+ else:
+ return url
+ except urllib2.HTTPError as e:
+ reason = "http error [%s]" % e.code
+ except urllib2.URLError as e:
+ reason = "url error [%s]" % e.reason
+ except socket.timeout as e:
+ reason = "socket timeout [%s]" % e
+ except Exception as e:
+ reason = "unexpected error [%s]" % e
+
+ time_taken = int(time.time() - start_time)
+ status_msg = "Calling '%s' failed [%s/%ss]: %s" % (url,
+ time_taken,
+ max_wait, reason)
+ status_cb(status_msg)
+
+ if timeup(max_wait, start_time):
+ break
+
+ loop_n = loop_n + 1
+ LOG.debug("Please wait %s seconds while we wait to try again",
+ sleep_time)
+ time.sleep(sleep_time)
+
+ return False