From 3d10b8d080a874be022f9e25063ba77f0293c5e8 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 25 Jul 2013 14:37:10 -0400 Subject: azure: support bouncing interfaces to publish new hostname See the added doc/sources/azure/README.rst for why this is necessary. Essentially, we now are doing the following in the get_data() method of azure datasource to publish this NewHostname: hostname NewHostName ifdown eth0; ifup eth0 LP: #1202758 --- cloudinit/sources/DataSourceAzure.py | 114 +++++++++++++++++++++++++++-------- 1 file changed, 88 insertions(+), 26 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 0a5caebe..30b06fef 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -31,9 +31,20 @@ LOG = logging.getLogger(__name__) DS_NAME = 'Azure' DEFAULT_METADATA = {"instance-id": "iid-AZURE-NODE"} AGENT_START = ['service', 'walinuxagent', 'start'] -BUILTIN_DS_CONFIG = {'datasource': {DS_NAME: { - 'agent_command': AGENT_START, - 'data_dir': "/var/lib/waagent"}}} +BOUNCE_COMMAND = ("i=$interface; x=0; ifdown $i || x=$?; " + "ifup $i || x=$?; exit $x") +BUILTIN_DS_CONFIG = { + 'agent_command': AGENT_START, + 'data_dir': "/var/lib/waagent", + 'set_hostname': True, + 'hostname_bounce': { + 'interface': 'eth0', + 'policy': True, + 'command': BOUNCE_COMMAND, + 'hostname_command': 'hostname', + } +} +DS_CFG_PATH = ['datasource', DS_NAME] class DataSourceAzureNet(sources.DataSource): @@ -42,19 +53,19 @@ class DataSourceAzureNet(sources.DataSource): self.seed_dir = os.path.join(paths.seed_dir, 'azure') self.cfg = {} self.seed = None + self.ds_cfg = util.mergemanydict([ + util.get_cfg_by_path(sys_cfg, DS_CFG_PATH), + BUILTIN_DS_CONFIG]) def __str__(self): root = sources.DataSource.__str__(self) return "%s [seed=%s]" % (root, self.seed) def get_data(self): - ddir_cfgpath = ['datasource', DS_NAME, 'data_dir'] # azure removes/ejects the cdrom containing the ovf-env.xml # file on reboot. So, in order to successfully reboot we # need to look in the datadir and consider that valid - ddir = util.get_cfg_by_path(self.sys_cfg, ddir_cfgpath) - if ddir is None: - ddir = util.get_cfg_by_path(BUILTIN_DS_CONFIG, ddir_cfgpath) + ddir = self.ds_cfg['data_dir'] candidates = [self.seed_dir] candidates.extend(list_possible_azure_ds_devs()) @@ -91,36 +102,40 @@ class DataSourceAzureNet(sources.DataSource): return False if found == ddir: - LOG.debug("using cached datasource in %s", ddir) - - fields = [('cmd', ['datasource', DS_NAME, 'agent_command']), - ('datadir', ddir_cfgpath)] - mycfg = {} - for cfg in (self.cfg, self.sys_cfg, BUILTIN_DS_CONFIG): - for name, path in fields: - if name in mycfg: - continue - value = util.get_cfg_by_path(cfg, keyp=path) - if value is not None: - mycfg[name] = value + LOG.debug("using files cached in %s", ddir) + + # now update ds_cfg to reflect contents pass in config + usercfg = util.get_cfg_by_path(self.cfg, DS_CFG_PATH, {}) + self.ds_cfg = util.mergemanydict([usercfg, self.ds_cfg]) + mycfg = self.ds_cfg # walinux agent writes files world readable, but expects # the directory to be protected. - write_files(mycfg['datadir'], files, dirmode=0700) + write_files(mycfg['data_dir'], files, dirmode=0700) + + # handle the hostname 'publishing' + try: + handle_set_hostname(mycfg.get('set_hostname'), + self.metadata.get('local-hostname'), + mycfg['hostname_bounce']) + except Exception as e: + LOG.warn("Failed publishing hostname: %s" % e) + util.logexc(LOG, "handling set_hostname failed") try: - invoke_agent(mycfg['cmd']) + invoke_agent(mycfg['agent_command']) except util.ProcessExecutionError: # claim the datasource even if the command failed - util.logexc(LOG, "agent command '%s' failed.", mycfg['cmd']) + util.logexc(LOG, "agent command '%s' failed.", + mycfg['agent_command']) - shcfgxml = os.path.join(mycfg['datadir'], "SharedConfig.xml") + shcfgxml = os.path.join(mycfg['data_dir'], "SharedConfig.xml") wait_for = [shcfgxml] fp_files = [] for pk in self.cfg.get('_pubkeys', []): bname = pk['fingerprint'] + ".crt" - fp_files += [os.path.join(mycfg['datadir'], bname)] + fp_files += [os.path.join(mycfg['data_dir'], bname)] start = time.time() missing = wait_for_files(wait_for + fp_files) @@ -148,6 +163,43 @@ class DataSourceAzureNet(sources.DataSource): return self.cfg +def handle_set_hostname(enabled, hostname, cfg): + if not util.is_true(enabled): + return + + if not hostname: + LOG.warn("set_hostname was true but no local-hostname") + return + + apply_hostname_bounce(hostname=hostname, policy=cfg['policy'], + interface=cfg['interface'], + command=cfg['command'], + hostname_command=cfg['hostname_command']) + + +def apply_hostname_bounce(hostname, policy, interface, command, + hostname_command="hostname"): + # set the hostname to 'hostname' if it is not already set to that. + # then, if policy is not off, bounce the interface using command + prev_hostname = util.subp(hostname_command, capture=True)[0].strip() + + util.subp([hostname_command, hostname]) + + if util.is_false(policy): + return + + if prev_hostname == hostname and policy != "force": + return + + env = os.environ.copy() + env['interface'] = interface + + if command == "builtin": + command = BOUNCE_COMMAND + + util.subp(command, shell=(not isinstance(command, list)), capture=True) + + def crtfile_to_pubkey(fname): pipeline = ('openssl x509 -noout -pubkey < "$0" |' 'ssh-keygen -i -m PKCS8 -f /dev/stdin') @@ -319,15 +371,21 @@ def read_azure_ovf(contents): name = child.localName.lower() simple = False + value = "" if (len(child.childNodes) == 1 and child.childNodes[0].nodeType == dom.TEXT_NODE): simple = True value = child.childNodes[0].wholeText + attrs = {k: v for k, v in child.attributes.items()} + # we accept either UserData or CustomData. If both are present # then behavior is undefined. if (name == "userdata" or name == "customdata"): - ud = base64.b64decode(''.join(value.split())) + if attrs.get('encoding') in (None, "base64"): + ud = base64.b64decode(''.join(value.split())) + else: + ud = value elif name == "username": username = value elif name == "userpassword": @@ -335,7 +393,11 @@ def read_azure_ovf(contents): elif name == "hostname": md['local-hostname'] = value elif name == "dscfg": - cfg['datasource'] = {DS_NAME: util.load_yaml(value, default={})} + if attrs.get('encoding') in (None, "base64"): + dscfg = base64.b64decode(''.join(value.split())) + else: + dscfg = value + cfg['datasource'] = {DS_NAME: util.load_yaml(dscfg, default={})} elif name == "ssh": cfg['_pubkeys'] = load_azure_ovf_pubkeys(child) elif name == "disablesshpasswordauthentication": -- cgit v1.2.3 From 4d9668dac5fed8f713f3b4300fdb574f399c14ee Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 25 Jul 2013 15:27:44 -0400 Subject: minor azure cleanup --- cloudinit/sources/DataSourceAzure.py | 2 +- tests/unittests/test_datasource/test_azure.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 30b06fef..d4863429 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -54,7 +54,7 @@ class DataSourceAzureNet(sources.DataSource): self.cfg = {} self.seed = None self.ds_cfg = util.mergemanydict([ - util.get_cfg_by_path(sys_cfg, DS_CFG_PATH), + util.get_cfg_by_path(sys_cfg, DS_CFG_PATH, {}), BUILTIN_DS_CONFIG]) def __str__(self): diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index c944cb13..4cd3f213 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -170,7 +170,6 @@ class TestAzureDataSource(MockerTestCase): 'dscfg': {'text': base64.b64encode(yaml.dump(cfg)), 'encoding': 'base64'}} data = {'ovfcontent': construct_valid_ovf_env(data=odata)} - print data dsrc = self._get_ds(data) ret = dsrc.get_data() -- cgit v1.2.3 From 9da084bda820d0ab8500ea6ee13a014f46d3bfab Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Fri, 26 Jul 2013 14:05:52 -0400 Subject: azure: fix bouncing of interface the environment that was set up to include 'interface' was not actually being passed on to 'subp', so when the command ran it wasn't available. --- cloudinit/sources/DataSourceAzure.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index d4863429..9503b045 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -31,8 +31,9 @@ LOG = logging.getLogger(__name__) DS_NAME = 'Azure' DEFAULT_METADATA = {"instance-id": "iid-AZURE-NODE"} AGENT_START = ['service', 'walinuxagent', 'start'] -BOUNCE_COMMAND = ("i=$interface; x=0; ifdown $i || x=$?; " - "ifup $i || x=$?; exit $x") +BOUNCE_COMMAND = ['sh', '-xc', + "i=$interface; x=0; ifdown $i || x=$?; ifup $i || x=$?; exit $x"] + BUILTIN_DS_CONFIG = { 'agent_command': AGENT_START, 'data_dir': "/var/lib/waagent", @@ -185,19 +186,29 @@ def apply_hostname_bounce(hostname, policy, interface, command, util.subp([hostname_command, hostname]) + msg = ("phostname=%s hostname=%s policy=%s interface=%s" % + (prev_hostname, hostname, policy, interface)) + if util.is_false(policy): + LOG.debug("pubhname: policy false, skipping [%s]", msg) return if prev_hostname == hostname and policy != "force": + LOG.debug("pubhname: no change, policy != force. skipping. [%s]", msg) return env = os.environ.copy() env['interface'] = interface + env['hostname'] = hostname + env['old_hostname'] = prev_hostname if command == "builtin": command = BOUNCE_COMMAND - util.subp(command, shell=(not isinstance(command, list)), capture=True) + LOG.debug("pubhname: publishing hostname [%s]", msg) + shell = not isinstance(command, (list, tuple)) + (output, err) = util.subp(command, shell=shell, capture=True, env=env) + LOG.debug("output: %s. err: %s", output, err) def crtfile_to_pubkey(fname): -- cgit v1.2.3 From e9f1190993fc5e66cd7bcc47016a5d7335fe8763 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Mon, 29 Jul 2013 12:02:03 -0400 Subject: DataSourceAzure: do not capture output of bounce command As shown in comments of bug 1202758 and filing of ntp bug 1206164, waiting for the output of this command causes us to wait for ntpdate to fully finish. Ideally I think we'd disable ntpdate running on this run, but that is not trivially possible. --- cloudinit/sources/DataSourceAzure.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 9503b045..2f28702e 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -206,9 +206,11 @@ def apply_hostname_bounce(hostname, policy, interface, command, command = BOUNCE_COMMAND LOG.debug("pubhname: publishing hostname [%s]", msg) + start = time.time() shell = not isinstance(command, (list, tuple)) - (output, err) = util.subp(command, shell=shell, capture=True, env=env) - LOG.debug("output: %s. err: %s", output, err) + # capture=False, see comments in bug 1202758 and bug 1206164. + (output, err) = util.subp(command, shell=shell, capture=False, env=env) + LOG.debug("publishing hostname took %.3f seconds", time.time() - start) def crtfile_to_pubkey(fname): -- cgit v1.2.3 From 66490ebb92af59d148f79aae42a2eddc1ecedb7e Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Tue, 30 Jul 2013 14:23:10 -0400 Subject: add util.log_time helper The reason for this is that more and more things I was wanting to be able to see how long they took. This puts that time logic into a single place. It also supports (by default) reading from /proc/uptime as the timing mechanism. While that is almost certainly slower than time.time(), it does give millisecond granularity and is not affected by 'ntpdate' having run in between the two events. --- ChangeLog | 3 +++ bin/cloud-init | 5 ++++- cloudinit/config/cc_growpart.py | 3 ++- cloudinit/config/cc_resizefs.py | 11 +++++------ cloudinit/sources/DataSourceAzure.py | 14 ++++++-------- cloudinit/util.py | 35 +++++++++++++++++++++++++++++++++++ 6 files changed, 55 insertions(+), 16 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/ChangeLog b/ChangeLog index a255d24a..68d03376 100644 --- a/ChangeLog +++ b/ChangeLog @@ -11,6 +11,9 @@ - always finalize handlers even if processing failed (LP: #1203368) - support merging into cloud-config via jsonp. (LP: #1200476) - add datasource 'SmartOS' for Joyent Cloud. Adds a dependency on serial. + - add 'log_time' helper to util for timing how long things take + which also reads from uptime. uptime is useful as clock may change during + boot due to ntp. 0.7.2: - add a debian watch file - add 'sudo' entry to ubuntu's default user (LP: #1080717) diff --git a/bin/cloud-init b/bin/cloud-init index c5a5b949..bd9ddc04 100755 --- a/bin/cloud-init +++ b/bin/cloud-init @@ -502,7 +502,10 @@ def main(): signal_handler.attach_handlers() (name, functor) = args.action - return functor(name, args) + + return util.log_time(logfunc=LOG.debug, + msg="cloud-init mode '%s'" % name, uptime=True, + func=functor, args=(name, args)) if __name__ == '__main__': diff --git a/cloudinit/config/cc_growpart.py b/cloudinit/config/cc_growpart.py index 4f8c8f80..ba6c58af 100644 --- a/cloudinit/config/cc_growpart.py +++ b/cloudinit/config/cc_growpart.py @@ -264,7 +264,8 @@ def handle(_name, cfg, _cloud, log, _args): raise e return - resized = resize_devices(resizer, devices) + resized = util.log_time(logfunc=log.debug, msg="resize_devices", + func=resize_devices, args=(resizer, devices)) for (entry, action, msg) in resized: if action == RESIZE.CHANGED: log.info("'%s' resized: %s" % (entry, msg)) diff --git a/cloudinit/config/cc_resizefs.py b/cloudinit/config/cc_resizefs.py index b4ee16b2..56040fdd 100644 --- a/cloudinit/config/cc_resizefs.py +++ b/cloudinit/config/cc_resizefs.py @@ -21,7 +21,6 @@ import errno import os import stat -import time from cloudinit.settings import PER_ALWAYS from cloudinit import util @@ -120,9 +119,12 @@ def handle(name, cfg, _cloud, log, args): if resize_root == NOBLOCK: # Fork to a child that will run # the resize command - util.fork_cb(do_resize, resize_cmd, log) + util.fork_cb( + util.log_time(logfunc=log.debug, msg="backgrounded Resizing", + func=do_resize, args=(resize_cmd, log))) else: - do_resize(resize_cmd, log) + util.log_time(logfunc=log.debug, msg="Resizing", + func=do_resize, args=(resize_cmd, log)) action = 'Resized' if resize_root == NOBLOCK: @@ -132,13 +134,10 @@ def handle(name, cfg, _cloud, log, args): def do_resize(resize_cmd, log): - start = time.time() try: util.subp(resize_cmd) except util.ProcessExecutionError: util.logexc(log, "Failed to resize filesystem (cmd=%s)", resize_cmd) raise - tot_time = time.time() - start - log.debug("Resizing took %.3f seconds", tot_time) # TODO(harlowja): Should we add a fsck check after this to make # sure we didn't corrupt anything? diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 2f28702e..f906b8fa 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -138,13 +138,11 @@ class DataSourceAzureNet(sources.DataSource): bname = pk['fingerprint'] + ".crt" fp_files += [os.path.join(mycfg['data_dir'], bname)] - start = time.time() - missing = wait_for_files(wait_for + fp_files) + missing = util.log_time(logfunc=LOG.debug, msg="waiting for files", + func=wait_for_files, + args=(wait_for + fp_files,)) if len(missing): LOG.warn("Did not find files, but going on: %s", missing) - else: - LOG.debug("waited %.3f seconds for %d files to appear", - time.time() - start, len(wait_for)) if shcfgxml in missing: LOG.warn("SharedConfig.xml missing, using static instance-id") @@ -206,11 +204,11 @@ def apply_hostname_bounce(hostname, policy, interface, command, command = BOUNCE_COMMAND LOG.debug("pubhname: publishing hostname [%s]", msg) - start = time.time() shell = not isinstance(command, (list, tuple)) # capture=False, see comments in bug 1202758 and bug 1206164. - (output, err) = util.subp(command, shell=shell, capture=False, env=env) - LOG.debug("publishing hostname took %.3f seconds", time.time() - start) + util.log_time(logfunc=LOG.debug, msg="publishing hostname", + func=util.subp, kwargs={'command': command, 'shell': shell, + 'capture': False, 'env': env}) def crtfile_to_pubkey(fname): diff --git a/cloudinit/util.py b/cloudinit/util.py index 8542fe27..b0eb56e6 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -1770,3 +1770,38 @@ def which(program): return exe_file return None + + +def log_time(logfunc, msg, func, args=None, kwargs=None, get_uptime=True): + if args is None: + args = [] + if kwargs is None: + kwargs = {} + + start = time.time() + + ustart = None + if get_uptime: + try: + ustart = float(uptime()) + except ValueError: + pass + + try: + ret = func(*args, **kwargs) + finally: + delta = time.time() - start + if ustart is not None: + try: + udelta = float(uptime()) - ustart + except ValueError: + udelta = "N/A" + + tmsg = " took %0.3f seconds" % delta + if get_uptime: + tmsg += "(%0.2f)" % udelta + try: + logfunc(msg + tmsg) + except: + pass + return ret -- cgit v1.2.3 From 0ca150b08433fbc57e10d599a46e300142c955c5 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Tue, 30 Jul 2013 14:28:09 -0400 Subject: set reading /proc/uptime to false by default. reading /proc/uptime is going to be slower, and no reason to do it on most things. Better to only do it when you suspect maybe a need for it. --- bin/cloud-init | 5 ++--- cloudinit/sources/DataSourceAzure.py | 5 +++-- cloudinit/util.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/bin/cloud-init b/bin/cloud-init index bd9ddc04..b4f9fd07 100755 --- a/bin/cloud-init +++ b/bin/cloud-init @@ -503,9 +503,8 @@ def main(): (name, functor) = args.action - return util.log_time(logfunc=LOG.debug, - msg="cloud-init mode '%s'" % name, uptime=True, - func=functor, args=(name, args)) + return util.log_time(logfunc=LOG.debug, msg="cloud-init mode '%s'" % name, + get_uptime=True, func=functor, args=(name, args)) if __name__ == '__main__': diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index f906b8fa..1a74de21 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -207,8 +207,9 @@ def apply_hostname_bounce(hostname, policy, interface, command, shell = not isinstance(command, (list, tuple)) # capture=False, see comments in bug 1202758 and bug 1206164. util.log_time(logfunc=LOG.debug, msg="publishing hostname", - func=util.subp, kwargs={'command': command, 'shell': shell, - 'capture': False, 'env': env}) + get_uptime=True, func=util.subp, + kwargs={'command': command, 'shell': shell, 'capture': False, + 'env': env}) def crtfile_to_pubkey(fname): diff --git a/cloudinit/util.py b/cloudinit/util.py index b0eb56e6..4a74ba57 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -1772,7 +1772,7 @@ def which(program): return None -def log_time(logfunc, msg, func, args=None, kwargs=None, get_uptime=True): +def log_time(logfunc, msg, func, args=None, kwargs=None, get_uptime=False): if args is None: args = [] if kwargs is None: -- cgit v1.2.3