From 7f2e99f5345c227d07849da68acdf8562b44c3e1 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Wed, 25 May 2016 17:05:09 -0400 Subject: commit to push for fear of loss. == background == DataSource Mode (dsmode) is present in many datasources in cloud-init. dsmode was originally added to cloud-init to specify when this datasource should be 'realized'. cloud-init has 4 stages of boot. a.) cloud-init --local . network is guaranteed not present. b.) cloud-init (--network). network is guaranteed present. c.) cloud-config d.) cloud-init final 'init_modules' [1] are run "as early as possible". And as such, are executed in either 'a' or 'b' based on the datasource. However, executing them means that user-data has been fully consumed. User-data and vendor-data may have '#include http://...' which then rely on the network being present. boothooks are an example of the things run in init_modules. The 'dsmode' was a way for a user to indicate that init_modules should run at 'a' (dsmode=local) or 'b' (dsmode=net) directly. Things were further confused when a datasource could provide networking configuration. Then, we needed to apply the networking config at 'a' but if the user had provided boothooks that expected networking, then the init_modules would need to be executed at 'b'. The config drive datasource hacked its way through this and applies networking if *it* detects it is a new instance. == Suggested Change == The plan is to 1. incorporate 'dsmode' into DataSource superclass 2. make all existing datasources default to network 3. apply any networking configuration from a datasource on first boot only apply_networking will always rename network devices when it runs. for bug 1579130. 4. run init_modules at cloud-init (network) time frame unless datasource is 'local'. 5. Datasources can provide a 'first_boot' method that will be called when a new instance_id is found. This will allow the config drive's write_files to be applied once. Over all, this will very much simplify things. We'll no longer have 2 sources like DataSourceNoCloud and DataSourceNoCloudNet, but would just have one source with a dsmode. == Concerns == Some things have odd reliance on dsmode. For example, OpenNebula's get_hostname uses it to determine if it should do a lookup of an ip address. == Bugs to fix here == http://pad.lv/1577982 ConfigDrive: cloud-init fails to configure network from network_data.json http://pad.lv/1579130 need to support systemd.link renaming of devices in container http://pad.lv/1577844 Drop unnecessary blocking of all net udev rules --- bin/cloud-init | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'bin') diff --git a/bin/cloud-init b/bin/cloud-init index 5857af32..482b8402 100755 --- a/bin/cloud-init +++ b/bin/cloud-init @@ -236,6 +236,7 @@ def main_init(name, args): else: LOG.debug("Execution continuing, no previous run detected that" " would allow us to stop early.") + else: existing = "check" if util.get_cfg_option_bool(init.cfg, 'manual_cache_clean', False): @@ -265,17 +266,20 @@ def main_init(name, args): else: return (None, ["No instance datasource found."]) - if args.local: - if not init.ds_restored: - # if local mode and the datasource was not restored from cache - # (this is not first boot) then apply networking. - init.apply_network_config() - else: - LOG.debug("skipping networking config from restored datasource.") - # Stage 6 iid = init.instancify() LOG.debug("%s will now be targeting instance id: %s", name, iid) + + if init.is_new_instance(): + # on new instance, apply network config. if not in local mode, + # then we just bring up new networking as the OS has already + # brought up the configured networking. + init.apply_network_config(bringup=not args.local) + + if args.local and init.datasource.dsmode != sources.DSMODE_LOCAL: + return (init.datasource, []) + + # update fully realizes user-data (pulling in #include if necessary) init.update() # Stage 7 try: -- cgit v1.2.3 From 443df65a45e42c354b8eb7638528970adf036290 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Wed, 25 May 2016 20:36:40 -0400 Subject: fix bring_up --- bin/cloud-init | 4 ++-- cloudinit/stages.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'bin') diff --git a/bin/cloud-init b/bin/cloud-init index 482b8402..1d421acf 100755 --- a/bin/cloud-init +++ b/bin/cloud-init @@ -260,7 +260,7 @@ def main_init(name, args): util.logexc(LOG, ("No instance datasource found!" " Likely bad things to come!")) if not args.force: - init.apply_network_config() + init.apply_network_config(bring_up=not args.local) if args.local: return (None, []) else: @@ -274,7 +274,7 @@ def main_init(name, args): # on new instance, apply network config. if not in local mode, # then we just bring up new networking as the OS has already # brought up the configured networking. - init.apply_network_config(bringup=not args.local) + init.apply_network_config(bring_up=not args.local) if args.local and init.datasource.dsmode != sources.DSMODE_LOCAL: return (init.datasource, []) diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 53ebcb45..35eddc3b 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -624,16 +624,16 @@ class Init(object): return (ncfg, loc) return (net.generate_fallback_config(), "fallback") - def apply_network_config(self, bringup): + def apply_network_config(self, bring_up): netcfg, src = self._find_networking_config() if netcfg is None: LOG.info("network config is disabled by %s", src) return LOG.info("Applying network configuration from %s bringup=%s: %s", - src, bringup, netcfg) + src, bring_up, netcfg) try: - return self.distro.apply_network_config(netcfg, bringup=bringup) + return self.distro.apply_network_config(netcfg, bring_up=bring_up) except NotImplementedError: LOG.warn("distro '%s' does not implement apply_network_config. " "networking may not be configured properly." % -- cgit v1.2.3 From f358f286dc4887fc40b5e2ad370dc35d46905121 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 26 May 2016 10:50:32 -0400 Subject: clarify a comment --- bin/cloud-init | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'bin') diff --git a/bin/cloud-init b/bin/cloud-init index 1d421acf..ffd1f2cc 100755 --- a/bin/cloud-init +++ b/bin/cloud-init @@ -271,9 +271,9 @@ def main_init(name, args): LOG.debug("%s will now be targeting instance id: %s", name, iid) if init.is_new_instance(): - # on new instance, apply network config. if not in local mode, - # then we just bring up new networking as the OS has already - # brought up the configured networking. + # on new instance, apply network config. + # in network mode 'bring_up' must be passed in as the OS + # has already brought up networking. init.apply_network_config(bring_up=not args.local) if args.local and init.datasource.dsmode != sources.DSMODE_LOCAL: -- cgit v1.2.3 From c21ef05925954e5781c793429316cd4eff64ee37 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 26 May 2016 15:51:09 -0400 Subject: fixes. seemingly working first boot on config drive bigger things here: * fix the checking for stop_files. the check for no-net actually checked the size of the file and the implementation was just touching it. so it never would have been found. no-net is valid only in upstart anyway. do not stop early on presense of the obj_pkl but check it. this is required since we write the obj_pkl on exit when local mode finds a datasource but found in network mode. * use 'mode' rather than checking args.local. set mode to be sources.DSMODE_NETWORK or sources.DSMODE_LOCAL for easier / more consistent checking. * log exit paths. --- bin/cloud-init | 52 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 18 deletions(-) (limited to 'bin') diff --git a/bin/cloud-init b/bin/cloud-init index ffd1f2cc..29e9b521 100755 --- a/bin/cloud-init +++ b/bin/cloud-init @@ -211,32 +211,31 @@ def main_init(name, args): util.logexc(LOG, "Failed to initialize, likely bad things to come!") # Stage 4 path_helper = init.paths - if not args.local: + mode = sources.DSMODE_LOCAL if args.local else sources.DSMODE_NETWORK + + if mode == sources.DSMODE_NETWORK: existing = "trust" sys.stderr.write("%s\n" % (netinfo.debug_info())) LOG.debug(("Checking to see if files that we need already" " exist from a previous run that would allow us" " to stop early.")) + # no-net is written by upstart cloud-init-nonet when network failed + # to come up stop_files = [ os.path.join(path_helper.get_cpath("data"), "no-net"), - path_helper.get_ipath_cur("obj_pkl"), ] existing_files = [] for fn in stop_files: - try: - c = util.load_file(fn) - if len(c): - existing_files.append((fn, len(c))) - except Exception: - pass + if os.path.isfile(fn): + existing_files.append(fn) + if existing_files: - LOG.debug("Exiting early due to the existence of %s files", - existing_files) + LOG.debug("[%s] Exiting. stop file %s existed", + mode, existing_files) return (None, []) else: LOG.debug("Execution continuing, no previous run detected that" " would allow us to stop early.") - else: existing = "check" if util.get_cfg_option_bool(init.cfg, 'manual_cache_clean', False): @@ -249,35 +248,52 @@ def main_init(name, args): # Stage 5 try: init.fetch(existing=existing) + # if in network mode, and the datasource is local + # then work was done at that stage. + if mode == sources.DSMODE_NETWORK and init.datasource.dsmode != mode: + LOG.debug("[%s] Exiting. datasource %s in local mode", + mode, init.datasource) + return (None, []) except sources.DataSourceNotFoundException: # In the case of 'cloud-init init' without '--local' it is a bit # more likely that the user would consider it failure if nothing was # found. When using upstart it will also mentions job failure # in console log if exit code is != 0. - if args.local: + if mode == sources.DSMODE_LOCAL: LOG.debug("No local datasource found") else: util.logexc(LOG, ("No instance datasource found!" " Likely bad things to come!")) if not args.force: init.apply_network_config(bring_up=not args.local) - if args.local: + LOG.debug("[%s] Exiting without datasource in local mode", mode) + if mode == sources.DSMODE_LOCAL: return (None, []) else: return (None, ["No instance datasource found."]) + else: + LOG.debug("[%s] barreling on in force mode without datasource", + mode) # Stage 6 iid = init.instancify() - LOG.debug("%s will now be targeting instance id: %s", name, iid) + LOG.debug("[%s] %s will now be targeting instance id: %s. new=%s", + mode, name, iid, init.is_new_instance()) if init.is_new_instance(): # on new instance, apply network config. # in network mode 'bring_up' must be passed in as the OS # has already brought up networking. - init.apply_network_config(bring_up=not args.local) + init.apply_network_config(bring_up=bool(mode != sources.DSMODE_LOCAL)) - if args.local and init.datasource.dsmode != sources.DSMODE_LOCAL: - return (init.datasource, []) + if mode == sources.DSMODE_LOCAL: + if init.datasource.dsmode != mode: + LOG.debug("[%s] Exiting. datasource %s not in local mode.", + mode, init.datasource) + return (init.datasource, []) + else: + LOG.debug("[%s] %s is in local mode, will apply init modules now.", + mode, init.datasource) # update fully realizes user-data (pulling in #include if necessary) init.update() @@ -532,7 +548,7 @@ def status_wrapper(name, args, data_d=None, link_d=None): v1[mode]['errors'] = [str(e) for e in errors] except Exception as e: - util.logexc(LOG, "failed of stage %s", mode) + util.logexc(LOG, "failed stage %s", mode) print_exc("failed run of stage %s" % mode) v1[mode]['errors'] = [str(e)] -- cgit v1.2.3 From 1b8a09389654a29af7e618b803bffaed0185e9e8 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Tue, 31 May 2016 17:17:39 -0400 Subject: add renaming code for renaming interfaces currently does not work in lxc https://github.com/lxc/lxd/issues/2063 --- bin/cloud-init | 6 +-- cloudinit/distros/__init__.py | 4 ++ cloudinit/net/__init__.py | 86 +++++++++++++++++++++++++++++++++++++++++++ cloudinit/stages.py | 10 +++++ 4 files changed, 101 insertions(+), 5 deletions(-) (limited to 'bin') diff --git a/bin/cloud-init b/bin/cloud-init index 29e9b521..21c3a684 100755 --- a/bin/cloud-init +++ b/bin/cloud-init @@ -280,11 +280,7 @@ def main_init(name, args): LOG.debug("[%s] %s will now be targeting instance id: %s. new=%s", mode, name, iid, init.is_new_instance()) - if init.is_new_instance(): - # on new instance, apply network config. - # in network mode 'bring_up' must be passed in as the OS - # has already brought up networking. - init.apply_network_config(bring_up=bool(mode != sources.DSMODE_LOCAL)) + init.apply_network_config(bring_up=bool(mode != sources.DSMODE_LOCAL)) if mode == sources.DSMODE_LOCAL: if init.datasource.dsmode != mode: diff --git a/cloudinit/distros/__init__.py b/cloudinit/distros/__init__.py index 3bfbc484..5c29c804 100644 --- a/cloudinit/distros/__init__.py +++ b/cloudinit/distros/__init__.py @@ -31,6 +31,7 @@ import stat from cloudinit import importer from cloudinit import log as logging +from cloudinit import net from cloudinit import ssh_util from cloudinit import type_utils from cloudinit import util @@ -145,6 +146,9 @@ class Distro(object): return self._bring_up_interfaces(dev_names) return False + def apply_network_config_names(self, netconfig): + net.apply_network_config_names(netconfig) + @abc.abstractmethod def apply_locale(self, locale, out_fn=None): raise NotImplementedError() diff --git a/cloudinit/net/__init__.py b/cloudinit/net/__init__.py index 40d330b5..ec1b3835 100644 --- a/cloudinit/net/__init__.py +++ b/cloudinit/net/__init__.py @@ -813,4 +813,90 @@ def _ifaces_to_net_config_data(ifaces): 'config': [devs[d] for d in sorted(devs)]} +def apply_network_config_names(netcfg, strict_present=True, strict_busy=True): + """read the network config and rename devices accordingly. + if strict_present is false, then do not raise exception if no devices + match. if strict_busy is false, then do not raise exception if the + device cannot be renamed because it is currently configured.""" + renames = [] + for ent in netcfg.get('config', {}): + if ent.get('type') != 'physical': + continue + mac = ent.get('mac_address') + name = ent.get('name') + if not mac: + continue + renames.append([mac, name]) + + return rename_interfaces(renames) + + +def rename_interfaces(renames, strict_present=True, strict_busy=True): + cur_bymac = {get_interface_mac(n): n for n in get_devicelist()} + expected = {mac: name for mac, name in renames} + cur_byname = {v: k for k, v in cur_bymac.items()} + + tmpname_fmt = "cirename%d" + tmpi = -1 + + moves = [] + changes = [] + errors = [] + for mac, new_name in expected.items(): + cur_name = cur_bymac.get(mac) + if cur_name == new_name: + # nothing to do + continue + + if not cur_name: + if strict_present: + errors.append( + "[nic not present] Cannot rename mac=%s to %s" + ", not available." % (mac, new_name)) + elif is_up(cur_name): + if strict_busy: + errors.append("[busy] Error renaming mac=%s from %s to %s." % + (mac, cur_name, new_name)) + elif new_name in cur_byname: + if is_up(new_name): + if strict_busy: + errors.append( + "[busy-target] Error renaming mac=%s from %s to %s." % + (mac, cur_name, new_name)) + else: + tmp_name = None + while tmp_name is None or tmp_name in cur_byname: + tmpi += 1 + tmp_name = tmpname_fmt % tmpi + moves.append((mac, cur_name, tmp_name)) + changes.append((mac, tmp_name, new_name)) + else: + changes.append((mac, cur_name, new_name)) + + def rename_dev(cur, new): + cmd = ["ip", "link", "set", cur, "name", new] + util.subp(cmd) + + for mac, cur, new in moves + changes: + try: + rename_dev(cur, new) + except util.ProcessExecutionError as e: + errors.append( + "[unknown] Error renaming mac=%s from %s to %s. (%s)" % + (mac, cur, new, e)) + + if len(errors): + raise Exception('\n'.join(errors)) + + +def get_interface_mac(ifname): + """Returns the string value of an interface's MAC Address""" + return read_sys_net(ifname, "address", enoent=False) + + +def get_ifname_mac_pairs(): + """Build a list of tuples (ifname, mac)""" + return [(ifname, get_interface_mac(ifname)) for ifname in get_devicelist()] + + # vi: ts=4 expandtab syntax=python diff --git a/cloudinit/stages.py b/cloudinit/stages.py index f164d6f6..211d2286 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -632,6 +632,16 @@ class Init(object): LOG.info("network config is disabled by %s", src) return + try: + LOG.debug("applying net config names for %s" % netcfg) + self.distro.apply_network_config_names(netcfg) + except Exception as e: + LOG.warn("Failed to rename devices: %s", e) + + if not self.is_new_instance(): + LOG.debug("not a new instance. network config is not applied.") + return + LOG.info("Applying network configuration from %s bringup=%s: %s", src, bring_up, netcfg) try: -- cgit v1.2.3