diff options
author | aswinrajamannar <39812128+aswinrajamannar@users.noreply.github.com> | 2021-08-20 15:53:18 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-20 16:53:18 -0600 |
commit | 3ec8ddde0d1d2fd8597f7d2915baa3e328552ab1 (patch) | |
tree | ca76749caeeb00faccfd1c7668868ceb2ed74ced /cloudinit | |
parent | 7d3f5d750f6111c2716143364ea33486df67c927 (diff) | |
download | vyos-cloud-init-3ec8ddde0d1d2fd8597f7d2915baa3e328552ab1.tar.gz vyos-cloud-init-3ec8ddde0d1d2fd8597f7d2915baa3e328552ab1.zip |
Azure: During primary nic detection, check interface status continuously before rebinding again (#990)
Add 10 second polling loop in wait_for_link_up after performing
an unbind and re-bind of primary NIC in hv_netvsc driver.
Also reduce cloud-init logging levels to debug for these operations.
Diffstat (limited to 'cloudinit')
-rwxr-xr-x | cloudinit/sources/DataSourceAzure.py | 38 |
1 files changed, 20 insertions, 18 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index ba23139b..fddfe363 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -892,12 +892,12 @@ class DataSourceAzure(sources.DataSource): logger_func=LOG.info) return - LOG.info("Attempting to bring %s up", ifname) + LOG.debug("Attempting to bring %s up", ifname) attempts = 0 + LOG.info("Unbinding and binding the interface %s", ifname) while True: - LOG.info("Unbinding and binding the interface %s", ifname) devicename = net.read_sys_net(ifname, 'device/device_id').strip('{}') util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/unbind', @@ -912,26 +912,28 @@ class DataSourceAzure(sources.DataSource): report_diagnostic_event(msg, logger_func=LOG.info) return - sleep_duration = 1 - msg = ("Link is not up after %d attempts with %d seconds sleep " - "between attempts." % (attempts, sleep_duration)) - if attempts % 10 == 0: + msg = ("Link is not up after %d attempts to rebind" % attempts) report_diagnostic_event(msg, logger_func=LOG.info) - else: LOG.info(msg) - sleep(sleep_duration) - - # Since we just did a unbind and bind, check again after sleep - # but before doing unbind and bind again to avoid races where the - # link might take a slight delay after bind to be up. - if self.distro.networking.is_up(ifname): - msg = ("Link is up after checking after sleeping for %d secs" - " after %d attempts" % - (sleep_duration, attempts)) - report_diagnostic_event(msg, logger_func=LOG.info) - return + # It could take some time after rebind for the interface to be up. + # So poll for the status for some time before attempting to rebind + # again. + sleep_duration = 0.5 + max_status_polls = 20 + LOG.debug("Polling %d seconds for primary NIC link up after " + "rebind.", sleep_duration * max_status_polls) + + for i in range(0, max_status_polls): + if self.distro.networking.is_up(ifname): + msg = ("After %d attempts to rebind, link is up after " + "polling the link status %d times" % (attempts, i)) + report_diagnostic_event(msg, logger_func=LOG.info) + LOG.debug(msg) + return + else: + sleep(sleep_duration) @azure_ds_telemetry_reporter def _create_report_ready_marker(self): |