From d910ecd15de642d73a36e935704e54370f93c45b Mon Sep 17 00:00:00 2001 From: asakkurr Date: Mon, 12 Nov 2018 17:16:09 +0000 Subject: azure: fix regression introduced when persisting ephemeral dhcp lease In commitish 9073951 azure datasource tried to leverage stale DHCP information obtained from EphemeralDHCPv4 context manager to report updated provisioning status to the fabric earlier in the boot process. Unfortunately the stale ephemeral network configuration had already been torn down in preparation to bring up IMDS network config so the report attempt failed on timeout. This branch introduces obtain_lease and clean_network public methods on EphemeralDHCPv4 to allow for setup and teardown of ephemeral network configuration without using a context manager. Azure datasource now uses this to persist ephemeral network configuration across multiple contexts during provisioning to avoid multiple DHCP roundtrips. --- cloudinit/net/dhcp.py | 42 ++++++++++++++++++------ cloudinit/sources/DataSourceAzure.py | 47 ++++++++++++++++----------- tests/unittests/test_datasource/test_azure.py | 3 +- 3 files changed, 62 insertions(+), 30 deletions(-) diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py index 12cf5097..bdc5799f 100644 --- a/cloudinit/net/dhcp.py +++ b/cloudinit/net/dhcp.py @@ -40,34 +40,56 @@ class EphemeralDHCPv4(object): def __init__(self, iface=None): self.iface = iface self._ephipv4 = None + self.lease = None def __enter__(self): + """Setup sandboxed dhcp context.""" + return self.obtain_lease() + + def __exit__(self, excp_type, excp_value, excp_traceback): + """Teardown sandboxed dhcp context.""" + self.clean_network() + + def clean_network(self): + """Exit _ephipv4 context to teardown of ip configuration performed.""" + if self.lease: + self.lease = None + if not self._ephipv4: + return + self._ephipv4.__exit__(None, None, None) + + def obtain_lease(self): + """Perform dhcp discovery in a sandboxed environment if possible. + + @return: A dict representing dhcp options on the most recent lease + obtained from the dhclient discovery if run, otherwise an error + is raised. + + @raises: NoDHCPLeaseError if no leases could be obtained. + """ + if self.lease: + return self.lease try: leases = maybe_perform_dhcp_discovery(self.iface) except InvalidDHCPLeaseFileError: raise NoDHCPLeaseError() if not leases: raise NoDHCPLeaseError() - lease = leases[-1] + self.lease = leases[-1] LOG.debug("Received dhcp lease on %s for %s/%s", - lease['interface'], lease['fixed-address'], - lease['subnet-mask']) + self.lease['interface'], self.lease['fixed-address'], + self.lease['subnet-mask']) nmap = {'interface': 'interface', 'ip': 'fixed-address', 'prefix_or_mask': 'subnet-mask', 'broadcast': 'broadcast-address', 'router': 'routers'} - kwargs = dict([(k, lease.get(v)) for k, v in nmap.items()]) + kwargs = dict([(k, self.lease.get(v)) for k, v in nmap.items()]) if not kwargs['broadcast']: kwargs['broadcast'] = bcip(kwargs['prefix_or_mask'], kwargs['ip']) ephipv4 = EphemeralIPv4Network(**kwargs) ephipv4.__enter__() self._ephipv4 = ephipv4 - return lease - - def __exit__(self, excp_type, excp_value, excp_traceback): - if not self._ephipv4: - return - self._ephipv4.__exit__(excp_type, excp_value, excp_traceback) + return self.lease def maybe_perform_dhcp_discovery(nic=None): diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 7bdd43d8..5ec6096f 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -267,7 +267,6 @@ class DataSourceAzure(sources.DataSource): dsname = 'Azure' _negotiated = False _metadata_imds = sources.UNSET - lease_info = None def __init__(self, sys_cfg, distro, paths): sources.DataSource.__init__(self, sys_cfg, distro, paths) @@ -281,6 +280,7 @@ class DataSourceAzure(sources.DataSource): self._network_config = None # Regenerate network config new_instance boot and every boot self.update_events['network'].add(EventType.BOOT) + self._ephemeral_dhcp_ctx = None def __str__(self): root = sources.DataSource.__str__(self) @@ -407,10 +407,9 @@ class DataSourceAzure(sources.DataSource): LOG.warning("%s was not mountable", cdev) continue - should_report_ready_after_reprovision = False - if reprovision or self._should_reprovision(ret): + perform_reprovision = reprovision or self._should_reprovision(ret) + if perform_reprovision: ret = self._reprovision() - should_report_ready_after_reprovision = True imds_md = get_metadata_from_imds( self.fallback_interface, retries=3) (md, userdata_raw, cfg, files) = ret @@ -438,9 +437,16 @@ class DataSourceAzure(sources.DataSource): crawled_data['metadata']['instance-id'] = util.read_dmi_data( 'system-uuid') - if should_report_ready_after_reprovision: + if perform_reprovision: LOG.info("Reporting ready to Azure after getting ReprovisionData") - self._report_ready(lease=self.lease_info) + use_cached_ephemeral = (net.is_up(self.fallback_interface) and + getattr(self, '_ephemeral_dhcp_ctx', None)) + if use_cached_ephemeral: + self._report_ready(lease=self._ephemeral_dhcp_ctx.lease) + self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral + else: + with EphemeralDHCPv4() as lease: + self._report_ready(lease=lease) return crawled_data @@ -529,20 +535,23 @@ class DataSourceAzure(sources.DataSource): while True: try: - with EphemeralDHCPv4() as lease: - self.lease_info = lease - if report_ready: - path = REPORTED_READY_MARKER_FILE - LOG.info( - "Creating a marker file to report ready: %s", path) - util.write_file(path, "{pid}: {time}\n".format( - pid=os.getpid(), time=time())) - self._report_ready(lease=lease) - report_ready = False - return readurl(url, timeout=1, headers=headers, - exception_cb=exc_cb, infinite=True, - log_req_resp=False).contents + # Save our EphemeralDHCPv4 context so we avoid repeated dhcp + self._ephemeral_dhcp_ctx = EphemeralDHCPv4() + lease = self._ephemeral_dhcp_ctx.obtain_lease() + if report_ready: + path = REPORTED_READY_MARKER_FILE + LOG.info( + "Creating a marker file to report ready: %s", path) + util.write_file(path, "{pid}: {time}\n".format( + pid=os.getpid(), time=time())) + self._report_ready(lease=lease) + report_ready = False + return readurl(url, timeout=1, headers=headers, + exception_cb=exc_cb, infinite=True, + log_req_resp=False).contents except UrlError: + # Teardown our EphemeralDHCPv4 context on failure as we retry + self._ephemeral_dhcp_ctx.clean_network() pass def _report_ready(self, lease): diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 4c5c6c12..1dc69adb 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -513,6 +513,7 @@ fdescfs /dev/fd fdescfs rw 0 0 dsrc.crawl_metadata() self.assertEqual(str(cm.exception), error_msg) + @mock.patch('cloudinit.sources.DataSourceAzure.EphemeralDHCPv4') @mock.patch('cloudinit.sources.DataSourceAzure.util.write_file') @mock.patch( 'cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready') @@ -520,7 +521,7 @@ fdescfs /dev/fd fdescfs rw 0 0 def test_crawl_metadata_on_reprovision_reports_ready( self, poll_imds_func, report_ready_func, - m_write): + m_write, m_dhcp): """If reprovisioning, report ready at the end""" ovfenv = construct_valid_ovf_env( platform_settings={"PreprovisionedVm": "True"}) -- cgit v1.2.3