From ebc9ecbc8a76bdf511a456fb72339a7eb4c20568 Mon Sep 17 00:00:00 2001 From: Ryan Harper Date: Tue, 20 Jun 2017 17:06:43 -0500 Subject: Azure: Add network-config, Refactor net layer to handle duplicate macs. On systems with network devices with duplicate mac addresses, cloud-init will fail to rename the devices according to the specified network configuration. Refactor net layer to search by device driver and device id if available. Azure systems may have duplicate mac addresses by design. Update Azure datasource to run at init-local time and let Azure datasource generate a fallback networking config to handle advanced networking configurations. Lastly, add a 'setup' method to the datasources that is called before userdata/vendordata is processed but after networking is up. That is used here on Azure to interact with the 'fabric'. --- cloudinit/sources/DataSourceAzure.py | 114 +++++++++++++++++++++++++++++------ 1 file changed, 95 insertions(+), 19 deletions(-) (limited to 'cloudinit/sources/DataSourceAzure.py') diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 4fe0d635..b5a95a1f 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -16,6 +16,7 @@ from xml.dom import minidom import xml.etree.ElementTree as ET from cloudinit import log as logging +from cloudinit import net from cloudinit import sources from cloudinit.sources.helpers.azure import get_metadata_from_fabric from cloudinit import util @@ -245,7 +246,9 @@ def temporary_hostname(temp_hostname, cfg, hostname_command='hostname'): set_hostname(previous_hostname, hostname_command) -class DataSourceAzureNet(sources.DataSource): +class DataSourceAzure(sources.DataSource): + _negotiated = False + def __init__(self, sys_cfg, distro, paths): sources.DataSource.__init__(self, sys_cfg, distro, paths) self.seed_dir = os.path.join(paths.seed_dir, 'azure') @@ -255,6 +258,7 @@ class DataSourceAzureNet(sources.DataSource): util.get_cfg_by_path(sys_cfg, DS_CFG_PATH, {}), BUILTIN_DS_CONFIG]) self.dhclient_lease_file = self.ds_cfg.get('dhclient_lease_file') + self._network_config = None def __str__(self): root = sources.DataSource.__str__(self) @@ -331,6 +335,7 @@ class DataSourceAzureNet(sources.DataSource): if asset_tag != AZURE_CHASSIS_ASSET_TAG: LOG.debug("Non-Azure DMI asset tag '%s' discovered.", asset_tag) return False + ddir = self.ds_cfg['data_dir'] candidates = [self.seed_dir] @@ -375,13 +380,14 @@ class DataSourceAzureNet(sources.DataSource): LOG.debug("using files cached in %s", ddir) # azure / hyper-v provides random data here + # TODO. find the seed on FreeBSD platform + # now update ds_cfg to reflect contents pass in config if not util.is_FreeBSD(): seed = util.load_file("/sys/firmware/acpi/tables/OEM0", quiet=True, decode=False) if seed: self.metadata['random_seed'] = seed - # TODO. find the seed on FreeBSD platform - # now update ds_cfg to reflect contents pass in config + user_ds_cfg = util.get_cfg_by_path(self.cfg, DS_CFG_PATH, {}) self.ds_cfg = util.mergemanydict([user_ds_cfg, self.ds_cfg]) @@ -389,6 +395,40 @@ class DataSourceAzureNet(sources.DataSource): # the directory to be protected. write_files(ddir, files, dirmode=0o700) + self.metadata['instance-id'] = util.read_dmi_data('system-uuid') + + return True + + def device_name_to_device(self, name): + return self.ds_cfg['disk_aliases'].get(name) + + def get_config_obj(self): + return self.cfg + + def check_instance_id(self, sys_cfg): + # quickly (local check only) if self.instance_id is still valid + return sources.instance_id_matches_system_uuid(self.get_instance_id()) + + def setup(self, is_new_instance): + if self._negotiated is False: + LOG.debug("negotiating for %s (new_instance=%s)", + self.get_instance_id(), is_new_instance) + fabric_data = self._negotiate() + LOG.debug("negotiating returned %s", fabric_data) + if fabric_data: + self.metadata.update(fabric_data) + self._negotiated = True + else: + LOG.debug("negotiating already done for %s", + self.get_instance_id()) + + def _negotiate(self): + """Negotiate with fabric and return data from it. + + On success, returns a dictionary including 'public_keys'. + On failure, returns False. + """ + if self.ds_cfg['agent_command'] == AGENT_START_BUILTIN: self.bounce_network_with_azure_hostname() @@ -398,31 +438,64 @@ class DataSourceAzureNet(sources.DataSource): else: metadata_func = self.get_metadata_from_agent + LOG.debug("negotiating with fabric via agent command %s", + self.ds_cfg['agent_command']) try: fabric_data = metadata_func() except Exception as exc: - LOG.info("Error communicating with Azure fabric; assume we aren't" - " on Azure.", exc_info=True) + LOG.warning( + "Error communicating with Azure fabric; You may experience." + "connectivity issues.", exc_info=True) return False - self.metadata['instance-id'] = util.read_dmi_data('system-uuid') - self.metadata.update(fabric_data) - - return True - def device_name_to_device(self, name): - return self.ds_cfg['disk_aliases'].get(name) - - def get_config_obj(self): - return self.cfg - - def check_instance_id(self, sys_cfg): - # quickly (local check only) if self.instance_id is still valid - return sources.instance_id_matches_system_uuid(self.get_instance_id()) + return fabric_data def activate(self, cfg, is_new_instance): address_ephemeral_resize(is_new_instance=is_new_instance) return + @property + def network_config(self): + """Generate a network config like net.generate_fallback_network() with + the following execptions. + + 1. Probe the drivers of the net-devices present and inject them in + the network configuration under params: driver: value + 2. If the driver value is 'mlx4_core', the control mode should be + set to manual. The device will be later used to build a bond, + for now we want to ensure the device gets named but does not + break any network configuration + """ + blacklist = ['mlx4_core'] + if not self._network_config: + LOG.debug('Azure: generating fallback configuration') + # generate a network config, blacklist picking any mlx4_core devs + netconfig = net.generate_fallback_config( + blacklist_drivers=blacklist, config_driver=True) + + # if we have any blacklisted devices, update the network_config to + # include the device, mac, and driver values, but with no ip + # config; this ensures udev rules are generated but won't affect + # ip configuration + bl_found = 0 + for bl_dev in [dev for dev in net.get_devicelist() + if net.device_driver(dev) in blacklist]: + bl_found += 1 + cfg = { + 'type': 'physical', + 'name': 'vf%d' % bl_found, + 'mac_address': net.get_interface_mac(bl_dev), + 'params': { + 'driver': net.device_driver(bl_dev), + 'device_id': net.device_devid(bl_dev), + }, + } + netconfig['config'].append(cfg) + + self._network_config = netconfig + + return self._network_config + def _partitions_on_device(devpath, maxnum=16): # return a list of tuples (ptnum, path) for each part on devpath @@ -849,9 +922,12 @@ class NonAzureDataSource(Exception): pass +# Legacy: Must be present in case we load an old pkl object +DataSourceAzureNet = DataSourceAzure + # Used to match classes to dependencies datasources = [ - (DataSourceAzureNet, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)), + (DataSourceAzure, (sources.DEP_FILESYSTEM, )), ] -- cgit v1.2.3