summaryrefslogtreecommitdiff
path: root/cloudinit/sources
diff options
context:
space:
mode:
authorMoustafa Moustafa <momousta@microsoft.com>2020-08-20 12:04:03 -0700
committerGitHub <noreply@github.com>2020-08-20 13:04:03 -0600
commitd941c7f7846e0216873384044d20f4b6723697c5 (patch)
tree7eeadd04459d2313f628834658c9449a2304fe25 /cloudinit/sources
parent79a8ce7e714ae1686c10bff77612eab0f6eccc95 (diff)
downloadvyos-cloud-init-d941c7f7846e0216873384044d20f4b6723697c5.tar.gz
vyos-cloud-init-d941c7f7846e0216873384044d20f4b6723697c5.zip
Pushing cloud-init log to the KVP (#529)
Push the cloud-init.log file (Up to 500KB at once) to the KVP before reporting ready to the Azure platform. Based on the analysis done on a large sample of cloud-init.log files, Here's the statistics collected on the log file size: P50 P90 P95 P99 P99.9 P99.99 137K 423K 537K 3.5MB 6MB 16MB This change limits the size of cloud-init.log file data that gets dumped to KVP to 500KB. So for ~95% of the cases, the whole log file will be dumped and for the remaining ~5%, we will get the last 500KB of the cloud-init.log file. To asses the performance of the 500KB limit, 250 VM were deployed with a 500KB cloud-init.log file and the time taken to compress, encode and dump the entries to KVP was measured. Here's the time in milliseconds percentiles: P50 P99 P999 75.705 232.701 1169.636 Another 250 VMs were deployed with this logic dumping their normal cloud-init.log file to KVP, the same timing was measured as above. Here's the time in milliseconds percentiles: P50 P99 P999 1.88 5.277 6.992 Added excluded_handlers to the report_event function to be able to opt-out from reporting the events of the compressed cloud-init.log file to the cloud-init.log file. The KVP break_down logic had a bug, where it will reuse the same key for all the split chunks of KVP which results in overwriting the split KVPs by the last one when consumed by Hyper-V. I added the split chunk index as a differentiator to the KVP key. The Hyper-V consumes the KVPs from the KVP file as chunks whose key is 512KB and value is 2048KB but the Azure platform expects the value to be 1024KB, thus I introduced the Azure value limit.
Diffstat (limited to 'cloudinit/sources')
-rwxr-xr-xcloudinit/sources/DataSourceAzure.py12
-rwxr-xr-xcloudinit/sources/helpers/azure.py58
2 files changed, 64 insertions, 6 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index a3810ca8..f572a4ef 100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -35,7 +35,8 @@ from cloudinit.sources.helpers.azure import (
report_diagnostic_event,
EphemeralDHCPv4WithReporting,
is_byte_swapped,
- dhcp_log_cb)
+ dhcp_log_cb,
+ push_log_to_kvp)
LOG = logging.getLogger(__name__)
@@ -789,9 +790,12 @@ class DataSourceAzure(sources.DataSource):
@azure_ds_telemetry_reporter
def activate(self, cfg, is_new_instance):
- address_ephemeral_resize(is_new_instance=is_new_instance,
- preserve_ntfs=self.ds_cfg.get(
- DS_CFG_KEY_PRESERVE_NTFS, False))
+ try:
+ address_ephemeral_resize(is_new_instance=is_new_instance,
+ preserve_ntfs=self.ds_cfg.get(
+ DS_CFG_KEY_PRESERVE_NTFS, False))
+ finally:
+ push_log_to_kvp(self.sys_cfg['def_log_file'])
return
@property
diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py
index 6df28ccf..6156c75b 100755
--- a/cloudinit/sources/helpers/azure.py
+++ b/cloudinit/sources/helpers/azure.py
@@ -1,5 +1,5 @@
# This file is part of cloud-init. See LICENSE file for license information.
-
+import base64
import json
import logging
import os
@@ -8,7 +8,9 @@ import socket
import struct
import time
import textwrap
+import zlib
+from cloudinit.settings import CFG_BUILTIN
from cloudinit.net import dhcp
from cloudinit import stages
from cloudinit import temp_utils
@@ -33,7 +35,14 @@ DEFAULT_WIRESERVER_ENDPOINT = "a8:3f:81:10"
BOOT_EVENT_TYPE = 'boot-telemetry'
SYSTEMINFO_EVENT_TYPE = 'system-info'
DIAGNOSTIC_EVENT_TYPE = 'diagnostic'
-
+COMPRESSED_EVENT_TYPE = 'compressed'
+# Maximum number of bytes of the cloud-init.log file that can be dumped to KVP
+# at once. This number is based on the analysis done on a large sample of
+# cloud-init.log files where the P95 of the file sizes was 537KB and the time
+# consumed to dump 500KB file was (P95:76, P99:233, P99.9:1170) in ms
+MAX_LOG_TO_KVP_LENGTH = 512000
+# Marker file to indicate whether cloud-init.log is pushed to KVP
+LOG_PUSHED_TO_KVP_MARKER_FILE = '/var/lib/cloud/data/log_pushed_to_kvp'
azure_ds_reporter = events.ReportEventStack(
name="azure-ds",
description="initialize reporter for azure ds",
@@ -177,6 +186,49 @@ def report_diagnostic_event(str):
return evt
+def report_compressed_event(event_name, event_content):
+ """Report a compressed event"""
+ compressed_data = base64.encodebytes(zlib.compress(event_content))
+ event_data = {"encoding": "gz+b64",
+ "data": compressed_data.decode('ascii')}
+ evt = events.ReportingEvent(
+ COMPRESSED_EVENT_TYPE, event_name,
+ json.dumps(event_data),
+ events.DEFAULT_EVENT_ORIGIN)
+ events.report_event(evt,
+ excluded_handler_types={"log", "print", "webhook"})
+
+ # return the event for unit testing purpose
+ return evt
+
+
+@azure_ds_telemetry_reporter
+def push_log_to_kvp(file_name=CFG_BUILTIN['def_log_file']):
+ """Push a portion of cloud-init.log file or the whole file to KVP
+ based on the file size.
+ If called more than once, it skips pushing the log file to KVP again."""
+
+ log_pushed_to_kvp = bool(os.path.isfile(LOG_PUSHED_TO_KVP_MARKER_FILE))
+ if log_pushed_to_kvp:
+ report_diagnostic_event("cloud-init.log is already pushed to KVP")
+ return
+
+ LOG.debug("Dumping cloud-init.log file to KVP")
+ try:
+ with open(file_name, "rb") as f:
+ f.seek(0, os.SEEK_END)
+ seek_index = max(f.tell() - MAX_LOG_TO_KVP_LENGTH, 0)
+ report_diagnostic_event(
+ "Dumping last {} bytes of cloud-init.log file to KVP".format(
+ f.tell() - seek_index))
+ f.seek(seek_index, os.SEEK_SET)
+ report_compressed_event("cloud-init.log", f.read())
+ util.write_file(LOG_PUSHED_TO_KVP_MARKER_FILE, '')
+ except Exception as ex:
+ report_diagnostic_event("Exception when dumping log file: %s" %
+ repr(ex))
+
+
@contextmanager
def cd(newdir):
prevdir = os.getcwd()
@@ -474,6 +526,8 @@ class GoalStateHealthReporter:
@azure_ds_telemetry_reporter
def _post_health_report(self, document):
+ push_log_to_kvp()
+
# Whenever report_diagnostic_event(diagnostic_msg) is invoked in code,
# the diagnostic messages are written to special files
# (/var/opt/hyperv/.kvp_pool_*) as Hyper-V KVP messages.