diff options
author | Sam Gilson <t-sagils@microsoft.com> | 2019-07-15 21:50:33 +0000 |
---|---|---|
committer | Server Team CI Bot <josh.powers+server-team-bot@canonical.com> | 2019-07-15 21:50:33 +0000 |
commit | a24550aee4c7282cd3624bf63f9501444e517678 (patch) | |
tree | 9dd5d148266673021a56f29c4282b61e6f372898 /cloudinit/analyze/show.py | |
parent | a6faf3acef02bd8cd4d46ac9efeebf24b3f21d81 (diff) | |
download | vyos-cloud-init-a24550aee4c7282cd3624bf63f9501444e517678.tar.gz vyos-cloud-init-a24550aee4c7282cd3624bf63f9501444e517678.zip |
Cloud-init analyze module: Added ability to analyze boot events.
This branch introduces a new command line feature for cloud-init.
Currently, the cloud-init module has the capability to analyze events in
cloud-init.log in three ways: 'show', 'blame', 'dump'.
These changes add a fourth capability, called 'boot'.
Running the command 'cloud-init analyze boot' will provide the user three
timestamps.
1) Timestamp for when the kernel starts initializing.
2) Timestamp for when the kernel finishes its initialization.
3) Timestamp for when systemd activates cloud-init.
This feature enables cloud-init users to analyze different boot phases.
This would aid in debugging performance issues related
to cloud-init startup or tracking regression.
Diffstat (limited to 'cloudinit/analyze/show.py')
-rw-r--r-- | cloudinit/analyze/show.py | 202 |
1 files changed, 192 insertions, 10 deletions
diff --git a/cloudinit/analyze/show.py b/cloudinit/analyze/show.py index 3e778b8b..511b808e 100644 --- a/cloudinit/analyze/show.py +++ b/cloudinit/analyze/show.py @@ -8,8 +8,11 @@ import base64 import datetime import json import os +import time +import sys from cloudinit import util +from cloudinit.distros import uses_systemd # An event: ''' @@ -49,6 +52,10 @@ format_key = { formatting_help = " ".join(["{0}: {1}".format(k.replace('%', '%%'), v) for k, v in format_key.items()]) +SUCCESS_CODE = 'successful' +FAIL_CODE = 'failure' +CONTAINER_CODE = 'container' +TIMESTAMP_UNKNOWN = (FAIL_CODE, -1, -1, -1) def format_record(msg, event): @@ -125,9 +132,175 @@ def total_time_record(total_time): return 'Total Time: %3.5f seconds\n' % total_time +class SystemctlReader(object): + ''' + Class for dealing with all systemctl subp calls in a consistent manner. + ''' + def __init__(self, property, parameter=None): + self.epoch = None + self.args = ['/bin/systemctl', 'show'] + if parameter: + self.args.append(parameter) + self.args.extend(['-p', property]) + # Don't want the init of our object to break. Instead of throwing + # an exception, set an error code that gets checked when data is + # requested from the object + self.failure = self.subp() + + def subp(self): + ''' + Make a subp call based on set args and handle errors by setting + failure code + + :return: whether the subp call failed or not + ''' + try: + value, err = util.subp(self.args, capture=True) + if err: + return err + self.epoch = value + return None + except Exception as systemctl_fail: + return systemctl_fail + + def parse_epoch_as_float(self): + ''' + If subp call succeeded, return the timestamp from subp as a float. + + :return: timestamp as a float + ''' + # subp has 2 ways to fail: it either fails and throws an exception, + # or returns an error code. Raise an exception here in order to make + # sure both scenarios throw exceptions + if self.failure: + raise RuntimeError('Subprocess call to systemctl has failed, ' + 'returning error code ({})' + .format(self.failure)) + # Output from systemctl show has the format Property=Value. + # For example, UserspaceMonotonic=1929304 + timestamp = self.epoch.split('=')[1] + # Timestamps reported by systemctl are in microseconds, converting + return float(timestamp) / 1000000 + + +def dist_check_timestamp(): + ''' + Determine which init system a particular linux distro is using. + Each init system (systemd, upstart, etc) has a different way of + providing timestamps. + + :return: timestamps of kernelboot, kernelendboot, and cloud-initstart + or TIMESTAMP_UNKNOWN if the timestamps cannot be retrieved. + ''' + + if uses_systemd(): + return gather_timestamps_using_systemd() + + # Use dmesg to get timestamps if the distro does not have systemd + if util.is_FreeBSD() or 'gentoo' in \ + util.system_info()['system'].lower(): + return gather_timestamps_using_dmesg() + + # this distro doesn't fit anything that is supported by cloud-init. just + # return error codes + return TIMESTAMP_UNKNOWN + + +def gather_timestamps_using_dmesg(): + ''' + Gather timestamps that corresponds to kernel begin initialization, + kernel finish initialization using dmesg as opposed to systemctl + + :return: the two timestamps plus a dummy timestamp to keep consistency + with gather_timestamps_using_systemd + ''' + try: + data, _ = util.subp(['dmesg'], capture=True) + split_entries = data[0].splitlines() + for i in split_entries: + if i.decode('UTF-8').find('user') != -1: + splitup = i.decode('UTF-8').split() + stripped = splitup[1].strip(']') + + # kernel timestamp from dmesg is equal to 0, + # with the userspace timestamp relative to it. + user_space_timestamp = float(stripped) + kernel_start = float(time.time()) - float(util.uptime()) + kernel_end = kernel_start + user_space_timestamp + + # systemd wont start cloud-init in this case, + # so we cannot get that timestamp + return SUCCESS_CODE, kernel_start, kernel_end, \ + kernel_end + + except Exception: + pass + return TIMESTAMP_UNKNOWN + + +def gather_timestamps_using_systemd(): + ''' + Gather timestamps that corresponds to kernel begin initialization, + kernel finish initialization. and cloud-init systemd unit activation + + :return: the three timestamps + ''' + kernel_start = float(time.time()) - float(util.uptime()) + try: + delta_k_end = SystemctlReader('UserspaceTimestampMonotonic')\ + .parse_epoch_as_float() + delta_ci_s = SystemctlReader('InactiveExitTimestampMonotonic', + 'cloud-init-local').parse_epoch_as_float() + base_time = kernel_start + status = SUCCESS_CODE + # lxc based containers do not set their monotonic zero point to be when + # the container starts, instead keep using host boot as zero point + # time.CLOCK_MONOTONIC_RAW is only available in python 3.3 + if util.is_container(): + # clock.monotonic also uses host boot as zero point + if sys.version_info >= (3, 3): + base_time = float(time.time()) - float(time.monotonic()) + # TODO: lxcfs automatically truncates /proc/uptime to seconds + # in containers when https://github.com/lxc/lxcfs/issues/292 + # is fixed, util.uptime() should be used instead of stat on + try: + file_stat = os.stat('/proc/1/cmdline') + kernel_start = file_stat.st_atime + except OSError as err: + raise RuntimeError('Could not determine container boot ' + 'time from /proc/1/cmdline. ({})' + .format(err)) + status = CONTAINER_CODE + else: + status = FAIL_CODE + kernel_end = base_time + delta_k_end + cloudinit_sysd = base_time + delta_ci_s + + except Exception as e: + # Except ALL exceptions as Systemctl reader can throw many different + # errors, but any failure in systemctl means that timestamps cannot be + # obtained + print(e) + return TIMESTAMP_UNKNOWN + return status, kernel_start, kernel_end, cloudinit_sysd + + def generate_records(events, blame_sort=False, print_format="(%n) %d seconds in %I%D", dump_files=False, log_datafiles=False): + ''' + Take in raw events and create parent-child dependencies between events + in order to order events in chronological order. + + :param events: JSONs from dump that represents events taken from logs + :param blame_sort: whether to sort by timestamp or by time taken. + :param print_format: formatting to represent event, time stamp, + and time taken by the event in one line + :param dump_files: whether to dump files into JSONs + :param log_datafiles: whether or not to log events generated + + :return: boot records ordered chronologically + ''' sorted_events = sorted(events, key=lambda x: x['timestamp']) records = [] @@ -189,19 +362,28 @@ def generate_records(events, blame_sort=False, def show_events(events, print_format): + ''' + A passthrough method that makes it easier to call generate_records() + + :param events: JSONs from dump that represents events taken from logs + :param print_format: formatting to represent event, time stamp, + and time taken by the event in one line + + :return: boot records ordered chronologically + ''' return generate_records(events, print_format=print_format) -def load_events(infile, rawdata=None): - if rawdata: - data = rawdata.read() - else: - data = infile.read() +def load_events_infile(infile): + ''' + Takes in a log file, read it, and convert to json. + + :param infile: The Log file to be read - j = None + :return: json version of logfile, raw file + ''' + data = infile.read() try: - j = json.loads(data) + return json.loads(data), data except ValueError: - pass - - return j, data + return None, data |