summaryrefslogtreecommitdiff
path: root/cloudinit
diff options
context:
space:
mode:
authoraswinrajamannar <39812128+aswinrajamannar@users.noreply.github.com>2021-08-10 12:28:00 -0700
committerGitHub <noreply@github.com>2021-08-10 14:28:00 -0500
commitd3271217e2745fb0e3405bd093b61c39fe0708a7 (patch)
tree8d140da5c47638db090305bddf558bfe3e918591 /cloudinit
parentc62cb3af59abc464380011c106b31879181e7c45 (diff)
downloadvyos-cloud-init-d3271217e2745fb0e3405bd093b61c39fe0708a7.tar.gz
vyos-cloud-init-d3271217e2745fb0e3405bd093b61c39fe0708a7.zip
Azure: Limit polling network metadata on connection errors (#961)
Diffstat (limited to 'cloudinit')
-rwxr-xr-xcloudinit/sources/DataSourceAzure.py27
1 files changed, 17 insertions, 10 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index 01e2c959..6df9934b 100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -972,7 +972,7 @@ class DataSourceAzure(sources.DataSource):
imds_md = None
metadata_poll_count = 0
metadata_logging_threshold = 1
- metadata_timeout_count = 0
+ expected_errors_count = 0
# For now, only a VM's primary NIC can contact IMDS and WireServer. If
# DHCP fails for a NIC, we have no mechanism to determine if the NIC is
@@ -998,13 +998,16 @@ class DataSourceAzure(sources.DataSource):
raise
# Retry polling network metadata for a limited duration only when the
- # calls fail due to timeout. This is because the platform drops packets
- # going towards IMDS when it is not a primary nic. If the calls fail
- # due to other issues like 410, 503 etc, then it means we are primary
- # but IMDS service is unavailable at the moment. Retry indefinitely in
- # those cases since we cannot move on without the network metadata.
+ # calls fail due to network unreachable error or timeout.
+ # This is because the platform drops packets going towards IMDS
+ # when it is not a primary nic. If the calls fail due to other issues
+ # like 410, 503 etc, then it means we are primary but IMDS service
+ # is unavailable at the moment. Retry indefinitely in those cases
+ # since we cannot move on without the network metadata. In the future,
+ # all this will not be necessary, as a new dhcp option would tell
+ # whether the nic is primary or not.
def network_metadata_exc_cb(msg, exc):
- nonlocal metadata_timeout_count, metadata_poll_count
+ nonlocal expected_errors_count, metadata_poll_count
nonlocal metadata_logging_threshold
metadata_poll_count = metadata_poll_count + 1
@@ -1024,9 +1027,13 @@ class DataSourceAzure(sources.DataSource):
(msg, exc.cause, exc.code),
logger_func=LOG.error)
- if exc.cause and isinstance(exc.cause, requests.Timeout):
- metadata_timeout_count = metadata_timeout_count + 1
- return (metadata_timeout_count <= 10)
+ # Retry up to a certain limit for both timeout and network
+ # unreachable errors.
+ if exc.cause and isinstance(
+ exc.cause, (requests.Timeout, requests.ConnectionError)
+ ):
+ expected_errors_count = expected_errors_count + 1
+ return (expected_errors_count <= 10)
return True
# Primary nic detection will be optimized in the future. The fact that