summaryrefslogtreecommitdiff
path: root/cloudinit
diff options
context:
space:
mode:
authoraswinrajamannar <39812128+aswinrajamannar@users.noreply.github.com>2021-08-20 15:53:18 -0700
committerGitHub <noreply@github.com>2021-08-20 16:53:18 -0600
commit3ec8ddde0d1d2fd8597f7d2915baa3e328552ab1 (patch)
treeca76749caeeb00faccfd1c7668868ceb2ed74ced /cloudinit
parent7d3f5d750f6111c2716143364ea33486df67c927 (diff)
downloadvyos-cloud-init-3ec8ddde0d1d2fd8597f7d2915baa3e328552ab1.tar.gz
vyos-cloud-init-3ec8ddde0d1d2fd8597f7d2915baa3e328552ab1.zip
Azure: During primary nic detection, check interface status continuously before rebinding again (#990)
Add 10 second polling loop in wait_for_link_up after performing an unbind and re-bind of primary NIC in hv_netvsc driver. Also reduce cloud-init logging levels to debug for these operations.
Diffstat (limited to 'cloudinit')
-rwxr-xr-xcloudinit/sources/DataSourceAzure.py38
1 files changed, 20 insertions, 18 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index ba23139b..fddfe363 100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -892,12 +892,12 @@ class DataSourceAzure(sources.DataSource):
logger_func=LOG.info)
return
- LOG.info("Attempting to bring %s up", ifname)
+ LOG.debug("Attempting to bring %s up", ifname)
attempts = 0
+ LOG.info("Unbinding and binding the interface %s", ifname)
while True:
- LOG.info("Unbinding and binding the interface %s", ifname)
devicename = net.read_sys_net(ifname,
'device/device_id').strip('{}')
util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/unbind',
@@ -912,26 +912,28 @@ class DataSourceAzure(sources.DataSource):
report_diagnostic_event(msg, logger_func=LOG.info)
return
- sleep_duration = 1
- msg = ("Link is not up after %d attempts with %d seconds sleep "
- "between attempts." % (attempts, sleep_duration))
-
if attempts % 10 == 0:
+ msg = ("Link is not up after %d attempts to rebind" % attempts)
report_diagnostic_event(msg, logger_func=LOG.info)
- else:
LOG.info(msg)
- sleep(sleep_duration)
-
- # Since we just did a unbind and bind, check again after sleep
- # but before doing unbind and bind again to avoid races where the
- # link might take a slight delay after bind to be up.
- if self.distro.networking.is_up(ifname):
- msg = ("Link is up after checking after sleeping for %d secs"
- " after %d attempts" %
- (sleep_duration, attempts))
- report_diagnostic_event(msg, logger_func=LOG.info)
- return
+ # It could take some time after rebind for the interface to be up.
+ # So poll for the status for some time before attempting to rebind
+ # again.
+ sleep_duration = 0.5
+ max_status_polls = 20
+ LOG.debug("Polling %d seconds for primary NIC link up after "
+ "rebind.", sleep_duration * max_status_polls)
+
+ for i in range(0, max_status_polls):
+ if self.distro.networking.is_up(ifname):
+ msg = ("After %d attempts to rebind, link is up after "
+ "polling the link status %d times" % (attempts, i))
+ report_diagnostic_event(msg, logger_func=LOG.info)
+ LOG.debug(msg)
+ return
+ else:
+ sleep(sleep_duration)
@azure_ds_telemetry_reporter
def _create_report_ready_marker(self):