From e119ceceb7d76af7d75c04a8779b9c5fc68083a8 Mon Sep 17 00:00:00 2001 From: aswinrajamannar <39812128+aswinrajamannar@users.noreply.github.com> Date: Thu, 12 Aug 2021 12:44:53 -0700 Subject: Azure: Check if interface is up after sleep when trying to bring it up (#972) When bringing interface up by unbinding and then binding hv_netvsc driver, it might take a short delay after binding for the link to be up. So before trying unbind/bind again after sleep, check if the link is up. This is a corner case when a preprovisioned VM is reused and the NICs are hot-attached. --- cloudinit/sources/DataSourceAzure.py | 10 ++++++++++ tests/unittests/test_datasource/test_azure.py | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 6df9934b..ba23139b 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -923,6 +923,16 @@ class DataSourceAzure(sources.DataSource): sleep(sleep_duration) + # Since we just did a unbind and bind, check again after sleep + # but before doing unbind and bind again to avoid races where the + # link might take a slight delay after bind to be up. + if self.distro.networking.is_up(ifname): + msg = ("Link is up after checking after sleeping for %d secs" + " after %d attempts" % + (sleep_duration, attempts)) + report_diagnostic_event(msg, logger_func=LOG.info) + return + @azure_ds_telemetry_reporter def _create_report_ready_marker(self): path = REPORTED_READY_MARKER_FILE diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index 63eaf384..03609c3d 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -2907,6 +2907,25 @@ class TestPreprovisioningHotAttachNics(CiTestCase): dsa.wait_for_link_up("eth0") self.assertEqual(1, m_is_link_up.call_count) + @mock.patch(MOCKPATH + 'net.is_up', autospec=True) + @mock.patch(MOCKPATH + 'util.write_file') + @mock.patch('cloudinit.net.read_sys_net') + @mock.patch('cloudinit.distros.networking.LinuxNetworking.try_set_link_up') + def test_wait_for_link_up_checks_link_after_sleep( + self, m_is_link_up, m_read_sys_net, m_writefile, m_is_up): + """Waiting for link to be up should return immediately if the link is + already up.""" + + distro_cls = distros.fetch('ubuntu') + distro = distro_cls('ubuntu', {}, self.paths) + dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths) + m_is_link_up.return_value = False + m_is_up.return_value = True + + dsa.wait_for_link_up("eth0") + self.assertEqual(2, m_is_link_up.call_count) + self.assertEqual(1, m_is_up.call_count) + @mock.patch(MOCKPATH + 'util.write_file') @mock.patch('cloudinit.net.read_sys_net') @mock.patch('cloudinit.distros.networking.LinuxNetworking.try_set_link_up') -- cgit v1.2.3