diff options
author | Johnson Shi <Johnson.Shi@microsoft.com> | 2020-10-16 08:54:38 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-16 11:54:38 -0400 |
commit | 8766784f4b1d1f9f6a9094e1268e4accb811ea7f (patch) | |
tree | d654af968ed5b185402802a3cafea7c7357e0b26 /cloudinit/sources/DataSourceAzure.py | |
parent | c0e8480678e3a9173c9de1271f651fb3ba375f22 (diff) | |
download | vyos-cloud-init-8766784f4b1d1f9f6a9094e1268e4accb811ea7f.tar.gz vyos-cloud-init-8766784f4b1d1f9f6a9094e1268e4accb811ea7f.zip |
DataSourceAzure: write marker file after report ready in preprovisioning (#590)
DataSourceAzure previously writes the preprovisioning
reported ready marker file before it goes through the
report ready workflow. On certain VM instances, the
marker file is successfully written but then reporting
ready fails.
Upon rare VM reboots by the platform, cloud-init sees
that the report ready marker file already exists.
The existence of this marker file tells cloud-init
not to report ready again (because it mistakenly
assumes that it already reported ready in
preprovisioning).
In this scenario, cloud-init instead erroneously
takes the reprovisioning workflow instead of
reporting ready again.
Diffstat (limited to 'cloudinit/sources/DataSourceAzure.py')
-rwxr-xr-x | cloudinit/sources/DataSourceAzure.py | 23 |
1 files changed, 20 insertions, 3 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 8858fbd5..70e32f46 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -720,12 +720,23 @@ class DataSourceAzure(sources.DataSource): self._ephemeral_dhcp_ctx.clean_network() break + report_ready_succeeded = self._report_ready(lease=lease) + if not report_ready_succeeded: + msg = ('Failed reporting ready while in ' + 'the preprovisioning pool.') + report_diagnostic_event(msg, logger_func=LOG.error) + self._ephemeral_dhcp_ctx.clean_network() + raise sources.InvalidMetaDataException(msg) + path = REPORTED_READY_MARKER_FILE LOG.info( "Creating a marker file to report ready: %s", path) util.write_file(path, "{pid}: {time}\n".format( pid=os.getpid(), time=time())) - self._report_ready(lease=lease) + report_diagnostic_event( + 'Successfully created reported ready marker file ' + 'while in the preprovisioning pool.', + logger_func=LOG.debug) report_ready = False with events.ReportEventStack( @@ -773,14 +784,20 @@ class DataSourceAzure(sources.DataSource): return return_val @azure_ds_telemetry_reporter - def _report_ready(self, lease): - """Tells the fabric provisioning has completed """ + def _report_ready(self, lease: dict) -> bool: + """Tells the fabric provisioning has completed. + + @param lease: dhcp lease to use for sending the ready signal. + @return: The success status of sending the ready signal. + """ try: get_metadata_from_fabric(None, lease['unknown-245']) + return True except Exception as e: report_diagnostic_event( "Error communicating with Azure fabric; You may experience " "connectivity issues: %s" % e, logger_func=LOG.warning) + return False def _should_reprovision(self, ret): """Whether or not we should poll IMDS for reprovisioning data. |