diff options
Diffstat (limited to 'cloudinit')
-rw-r--r-- | cloudinit/DataSourceEc2.py | 166 |
1 files changed, 112 insertions, 54 deletions
diff --git a/cloudinit/DataSourceEc2.py b/cloudinit/DataSourceEc2.py index ea56960b..aee10ffa 100644 --- a/cloudinit/DataSourceEc2.py +++ b/cloudinit/DataSourceEc2.py @@ -48,8 +48,10 @@ class DataSourceEc2(DataSource.DataSource): try: if not self.wait_for_metadata_service(): return False + start = time.time() self.userdata_raw = boto_utils.get_instance_userdata(self.api_ver, None, self.metadata_address) self.metadata = boto_utils.get_instance_metadata(self.api_ver, self.metadata_address) + log.debug("crawl of metadata service took %ds" % (time.time()-start)) return True except Exception as e: print e @@ -81,37 +83,31 @@ class DataSourceEc2(DataSource.DataSource): except: return fallback - - def wait_for_metadata_service(self, sleeps = None): + def wait_for_metadata_service(self): mcfg = self.ds_cfg - if sleeps is None: - sleeps = 30 - try: - sleeps = int(mcfg.get("retries",sleeps)) - except Exception as e: - util.logexc(log) - log.warn("Failed to get number of sleeps, using %s" % sleeps) - if sleeps == 0: return False + if not hasattr(mcfg, "get"): + mcfg = {} - timeout=3 + max_wait = 120 try: - timeout = int(mcfg.get("timeout",timeout)) + max_wait = int(mcfg.get("max_wait",max_wait)) except Exception as e: util.logexc(log) - log.warn("Failed to get timeout, using %s" % timeout) + log.warn("Failed to get max wait. using %s" % max_wait) - sleeptime = 1 + if max_wait == 0: + return False - def_mdurls = ["http://169.254.169.254", "http://instance-data:8773"] + timeout = 50 try: - mdurls = mcfg.get("metadata_urls", def_mdurls) + timeout = int(mcfg.get("timeout",timeout)) except Exception as e: - mdurls = def_mdurls util.logexc(log) - log.warn("Failed to get metadata URLs, using defaults") + log.warn("Failed to get timeout, using %s" % timeout) - starttime = time.time() + def_mdurls = ["http://169.254.169.254", "http://instance-data:8773"] + mdurls = mcfg.get("metadata_urls", def_mdurls) # Remove addresses from the list that wont resolve. filtered = [x for x in mdurls if util.is_resolvable_url(x)] @@ -126,41 +122,25 @@ class DataSourceEc2(DataSource.DataSource): log.warn("Empty metadata url list! using default list") mdurls = def_mdurls - log.debug("Searching the following metadata urls: %s" % mdurls) - - for x in range(sleeps): - for url in mdurls: - iurl="%s/%s/meta-data/instance-id" % (url, self.api_ver) - - # given 100 sleeps, this ends up total sleep time of 1050 sec - sleeptime=int(x/5)+1 - - reason = "" - try: - req = urllib2.Request(iurl) - resp = urllib2.urlopen(req, timeout=timeout) - if resp.read() != "": - self.metadata_address = url - log.debug("Using metadata source: '%s'" % url) - return True - reason = "empty data [%s]" % resp.getcode() - except urllib2.HTTPError as e: - reason = "http error [%s]" % e.code - except urllib2.URLError as e: - reason = "url error [%s]" % e.reason - except socket.timeout as e: - reason = "socket timeout [%s]" % e - - #not needed? Addresses being checked are displayed above - #if x == 0: - # log.warn("waiting for metadata service at %s" % url) - - log.warn("'%s' failed: %s" % (url, reason)) - time.sleep(sleeptime) - - log.critical("giving up on md after %i seconds\n" % - int(time.time()-starttime)) - return False + urls = [ ] + url2base = { False: False } + for url in mdurls: + cur = "%s/%s/meta-data/instance-id" % (url, self.api_ver) + urls.append(cur) + url2base[cur] = url + + starttime = time.time() + url = wait_for_metadata_service(urls=urls, max_wait=max_wait, + timeout=timeout, status_cb=log.warn) + + if url: + log.debug("Using metadata source: '%s'" % url2base[url]) + else: + log.critical("giving up on md after %i seconds\n" % + int(time.time()-starttime)) + + self.metadata_address = url2base[url] + return (bool(url)) def device_name_to_device(self, name): # consult metadata service, that has @@ -221,6 +201,84 @@ class DataSourceEc2(DataSource.DataSource): return True return False + +def wait_for_metadata_service(urls, max_wait=None, timeout=None, status_cb=None): + """ + urls: a list of urls to try + max_wait: roughly the maximum time to wait before giving up + The max time is *actually* len(urls)*timeout as each url will + be tried once and given the timeout provided. + timeout: the timeout provided to urllib2.urlopen + status_cb: call method with string message when a url is not available + + the idea of this routine is to wait for the EC2 metdata service to + come up. On both Eucalyptus and EC2 we have seen the case where + the instance hit the MD before the MD service was up. EC2 seems + to have permenantely fixed this, though. + + In openstack, the metadata service might be painfully slow, and + unable to avoid hitting a timeout of even up to 10 seconds or more + (LP: #894279) for a simple GET. + + Offset those needs with the need to not hang forever (and block boot) + on a system where cloud-init is configured to look for EC2 Metadata + service but is not going to find one. It is possible that the instance + data host (169.254.169.254) may be firewalled off Entirely for a sytem, + meaning that the connection will block forever unless a timeout is set. + """ + starttime = time.time() + + sleeptime = 1 + timeout_orig = timeout + + if status_cb == None: + def status_cb(msg): return + + def timeup(max_wait, starttime): + return((max_wait <= 0 or max_wait == None) or + (time.time()-starttime > max_wait)) + + loop_n = 0 + while True: + sleeptime=int(loop_n/5)+1 + for url in urls: + now = time.time() + if loop_n != 0: + if timeup(max_wait, starttime): + break + if timeout and (now + timeout > (starttime + max_wait)): + # shorten timeout to not run way over max_time + timeout = int((starttime + max_wait) - now) + + reason = "" + try: + req = urllib2.Request(url) + resp = urllib2.urlopen(req, timeout=timeout) + if resp.read() != "": + return url + reason = "empty data [%s]" % resp.getcode() + except urllib2.HTTPError as e: + reason = "http error [%s]" % e.code + except urllib2.URLError as e: + reason = "url error [%s]" % e.reason + except socket.timeout as e: + reason = "socket timeout [%s]" % e + except Exception as e: + reason = "unexpected error [%s]" % e + + if log: + status_cb("'%s' failed [%s/%ss]: %s" % + (url, int(time.time()-starttime), max_wait, reason)) + + if timeup(max_wait, starttime): + break + + loop_n = loop_n + 1 + time.sleep(sleeptime) + + return False + + datasources = [ ( DataSourceEc2, ( DataSource.DEP_FILESYSTEM , DataSource.DEP_NETWORK ) ), ] |