diff options
Diffstat (limited to 'cloudinit/analyze')
-rw-r--r-- | cloudinit/analyze/__main__.py | 88 | ||||
-rw-r--r-- | cloudinit/analyze/show.py | 202 | ||||
-rw-r--r-- | cloudinit/analyze/tests/test_boot.py | 170 |
3 files changed, 448 insertions, 12 deletions
diff --git a/cloudinit/analyze/__main__.py b/cloudinit/analyze/__main__.py index f8613656..99e5c203 100644 --- a/cloudinit/analyze/__main__.py +++ b/cloudinit/analyze/__main__.py @@ -7,7 +7,7 @@ import re import sys from cloudinit.util import json_dumps - +from datetime import datetime from . import dump from . import show @@ -52,9 +52,93 @@ def get_parser(parser=None): dest='outfile', default='-', help='specify where to write output. ') parser_dump.set_defaults(action=('dump', analyze_dump)) + parser_boot = subparsers.add_parser( + 'boot', help='Print list of boot times for kernel and cloud-init') + parser_boot.add_argument('-i', '--infile', action='store', + dest='infile', default='/var/log/cloud-init.log', + help='specify where to read input. ') + parser_boot.add_argument('-o', '--outfile', action='store', + dest='outfile', default='-', + help='specify where to write output.') + parser_boot.set_defaults(action=('boot', analyze_boot)) return parser +def analyze_boot(name, args): + """Report a list of how long different boot operations took. + + For Example: + -- Most Recent Boot Record -- + Kernel Started at: <time> + Kernel ended boot at: <time> + Kernel time to boot (seconds): <time> + Cloud-init activated by systemd at: <time> + Time between Kernel end boot and Cloud-init activation (seconds):<time> + Cloud-init start: <time> + """ + infh, outfh = configure_io(args) + kernel_info = show.dist_check_timestamp() + status_code, kernel_start, kernel_end, ci_sysd_start = \ + kernel_info + kernel_start_timestamp = datetime.utcfromtimestamp(kernel_start) + kernel_end_timestamp = datetime.utcfromtimestamp(kernel_end) + ci_sysd_start_timestamp = datetime.utcfromtimestamp(ci_sysd_start) + try: + last_init_local = \ + [e for e in _get_events(infh) if e['name'] == 'init-local' and + 'starting search' in e['description']][-1] + ci_start = datetime.utcfromtimestamp(last_init_local['timestamp']) + except IndexError: + ci_start = 'Could not find init-local log-line in cloud-init.log' + status_code = show.FAIL_CODE + + FAILURE_MSG = 'Your Linux distro or container does not support this ' \ + 'functionality.\n' \ + 'You must be running a Kernel Telemetry supported ' \ + 'distro.\nPlease check ' \ + 'https://cloudinit.readthedocs.io/en/latest' \ + '/topics/analyze.html for more ' \ + 'information on supported distros.\n' + + SUCCESS_MSG = '-- Most Recent Boot Record --\n' \ + ' Kernel Started at: {k_s_t}\n' \ + ' Kernel ended boot at: {k_e_t}\n' \ + ' Kernel time to boot (seconds): {k_r}\n' \ + ' Cloud-init activated by systemd at: {ci_sysd_t}\n' \ + ' Time between Kernel end boot and Cloud-init ' \ + 'activation (seconds): {bt_r}\n' \ + ' Cloud-init start: {ci_start}\n' + + CONTAINER_MSG = '-- Most Recent Container Boot Record --\n' \ + ' Container started at: {k_s_t}\n' \ + ' Cloud-init activated by systemd at: {ci_sysd_t}\n' \ + ' Cloud-init start: {ci_start}\n' \ + + status_map = { + show.FAIL_CODE: FAILURE_MSG, + show.CONTAINER_CODE: CONTAINER_MSG, + show.SUCCESS_CODE: SUCCESS_MSG + } + + kernel_runtime = kernel_end - kernel_start + between_process_runtime = ci_sysd_start - kernel_end + + kwargs = { + 'k_s_t': kernel_start_timestamp, + 'k_e_t': kernel_end_timestamp, + 'k_r': kernel_runtime, + 'bt_r': between_process_runtime, + 'k_e': kernel_end, + 'k_s': kernel_start, + 'ci_sysd': ci_sysd_start, + 'ci_sysd_t': ci_sysd_start_timestamp, + 'ci_start': ci_start + } + + outfh.write(status_map[status_code].format(**kwargs)) + return status_code + + def analyze_blame(name, args): """Report a list of records sorted by largest time delta. @@ -119,7 +203,7 @@ def analyze_dump(name, args): def _get_events(infile): rawdata = None - events, rawdata = show.load_events(infile, None) + events, rawdata = show.load_events_infile(infile) if not events: events, _ = dump.dump_events(rawdata=rawdata) return events diff --git a/cloudinit/analyze/show.py b/cloudinit/analyze/show.py index 3e778b8b..511b808e 100644 --- a/cloudinit/analyze/show.py +++ b/cloudinit/analyze/show.py @@ -8,8 +8,11 @@ import base64 import datetime import json import os +import time +import sys from cloudinit import util +from cloudinit.distros import uses_systemd # An event: ''' @@ -49,6 +52,10 @@ format_key = { formatting_help = " ".join(["{0}: {1}".format(k.replace('%', '%%'), v) for k, v in format_key.items()]) +SUCCESS_CODE = 'successful' +FAIL_CODE = 'failure' +CONTAINER_CODE = 'container' +TIMESTAMP_UNKNOWN = (FAIL_CODE, -1, -1, -1) def format_record(msg, event): @@ -125,9 +132,175 @@ def total_time_record(total_time): return 'Total Time: %3.5f seconds\n' % total_time +class SystemctlReader(object): + ''' + Class for dealing with all systemctl subp calls in a consistent manner. + ''' + def __init__(self, property, parameter=None): + self.epoch = None + self.args = ['/bin/systemctl', 'show'] + if parameter: + self.args.append(parameter) + self.args.extend(['-p', property]) + # Don't want the init of our object to break. Instead of throwing + # an exception, set an error code that gets checked when data is + # requested from the object + self.failure = self.subp() + + def subp(self): + ''' + Make a subp call based on set args and handle errors by setting + failure code + + :return: whether the subp call failed or not + ''' + try: + value, err = util.subp(self.args, capture=True) + if err: + return err + self.epoch = value + return None + except Exception as systemctl_fail: + return systemctl_fail + + def parse_epoch_as_float(self): + ''' + If subp call succeeded, return the timestamp from subp as a float. + + :return: timestamp as a float + ''' + # subp has 2 ways to fail: it either fails and throws an exception, + # or returns an error code. Raise an exception here in order to make + # sure both scenarios throw exceptions + if self.failure: + raise RuntimeError('Subprocess call to systemctl has failed, ' + 'returning error code ({})' + .format(self.failure)) + # Output from systemctl show has the format Property=Value. + # For example, UserspaceMonotonic=1929304 + timestamp = self.epoch.split('=')[1] + # Timestamps reported by systemctl are in microseconds, converting + return float(timestamp) / 1000000 + + +def dist_check_timestamp(): + ''' + Determine which init system a particular linux distro is using. + Each init system (systemd, upstart, etc) has a different way of + providing timestamps. + + :return: timestamps of kernelboot, kernelendboot, and cloud-initstart + or TIMESTAMP_UNKNOWN if the timestamps cannot be retrieved. + ''' + + if uses_systemd(): + return gather_timestamps_using_systemd() + + # Use dmesg to get timestamps if the distro does not have systemd + if util.is_FreeBSD() or 'gentoo' in \ + util.system_info()['system'].lower(): + return gather_timestamps_using_dmesg() + + # this distro doesn't fit anything that is supported by cloud-init. just + # return error codes + return TIMESTAMP_UNKNOWN + + +def gather_timestamps_using_dmesg(): + ''' + Gather timestamps that corresponds to kernel begin initialization, + kernel finish initialization using dmesg as opposed to systemctl + + :return: the two timestamps plus a dummy timestamp to keep consistency + with gather_timestamps_using_systemd + ''' + try: + data, _ = util.subp(['dmesg'], capture=True) + split_entries = data[0].splitlines() + for i in split_entries: + if i.decode('UTF-8').find('user') != -1: + splitup = i.decode('UTF-8').split() + stripped = splitup[1].strip(']') + + # kernel timestamp from dmesg is equal to 0, + # with the userspace timestamp relative to it. + user_space_timestamp = float(stripped) + kernel_start = float(time.time()) - float(util.uptime()) + kernel_end = kernel_start + user_space_timestamp + + # systemd wont start cloud-init in this case, + # so we cannot get that timestamp + return SUCCESS_CODE, kernel_start, kernel_end, \ + kernel_end + + except Exception: + pass + return TIMESTAMP_UNKNOWN + + +def gather_timestamps_using_systemd(): + ''' + Gather timestamps that corresponds to kernel begin initialization, + kernel finish initialization. and cloud-init systemd unit activation + + :return: the three timestamps + ''' + kernel_start = float(time.time()) - float(util.uptime()) + try: + delta_k_end = SystemctlReader('UserspaceTimestampMonotonic')\ + .parse_epoch_as_float() + delta_ci_s = SystemctlReader('InactiveExitTimestampMonotonic', + 'cloud-init-local').parse_epoch_as_float() + base_time = kernel_start + status = SUCCESS_CODE + # lxc based containers do not set their monotonic zero point to be when + # the container starts, instead keep using host boot as zero point + # time.CLOCK_MONOTONIC_RAW is only available in python 3.3 + if util.is_container(): + # clock.monotonic also uses host boot as zero point + if sys.version_info >= (3, 3): + base_time = float(time.time()) - float(time.monotonic()) + # TODO: lxcfs automatically truncates /proc/uptime to seconds + # in containers when https://github.com/lxc/lxcfs/issues/292 + # is fixed, util.uptime() should be used instead of stat on + try: + file_stat = os.stat('/proc/1/cmdline') + kernel_start = file_stat.st_atime + except OSError as err: + raise RuntimeError('Could not determine container boot ' + 'time from /proc/1/cmdline. ({})' + .format(err)) + status = CONTAINER_CODE + else: + status = FAIL_CODE + kernel_end = base_time + delta_k_end + cloudinit_sysd = base_time + delta_ci_s + + except Exception as e: + # Except ALL exceptions as Systemctl reader can throw many different + # errors, but any failure in systemctl means that timestamps cannot be + # obtained + print(e) + return TIMESTAMP_UNKNOWN + return status, kernel_start, kernel_end, cloudinit_sysd + + def generate_records(events, blame_sort=False, print_format="(%n) %d seconds in %I%D", dump_files=False, log_datafiles=False): + ''' + Take in raw events and create parent-child dependencies between events + in order to order events in chronological order. + + :param events: JSONs from dump that represents events taken from logs + :param blame_sort: whether to sort by timestamp or by time taken. + :param print_format: formatting to represent event, time stamp, + and time taken by the event in one line + :param dump_files: whether to dump files into JSONs + :param log_datafiles: whether or not to log events generated + + :return: boot records ordered chronologically + ''' sorted_events = sorted(events, key=lambda x: x['timestamp']) records = [] @@ -189,19 +362,28 @@ def generate_records(events, blame_sort=False, def show_events(events, print_format): + ''' + A passthrough method that makes it easier to call generate_records() + + :param events: JSONs from dump that represents events taken from logs + :param print_format: formatting to represent event, time stamp, + and time taken by the event in one line + + :return: boot records ordered chronologically + ''' return generate_records(events, print_format=print_format) -def load_events(infile, rawdata=None): - if rawdata: - data = rawdata.read() - else: - data = infile.read() +def load_events_infile(infile): + ''' + Takes in a log file, read it, and convert to json. + + :param infile: The Log file to be read - j = None + :return: json version of logfile, raw file + ''' + data = infile.read() try: - j = json.loads(data) + return json.loads(data), data except ValueError: - pass - - return j, data + return None, data diff --git a/cloudinit/analyze/tests/test_boot.py b/cloudinit/analyze/tests/test_boot.py new file mode 100644 index 00000000..706e2cc0 --- /dev/null +++ b/cloudinit/analyze/tests/test_boot.py @@ -0,0 +1,170 @@ +import os +from cloudinit.analyze.__main__ import (analyze_boot, get_parser) +from cloudinit.tests.helpers import CiTestCase, mock +from cloudinit.analyze.show import dist_check_timestamp, SystemctlReader, \ + FAIL_CODE, CONTAINER_CODE + +err_code = (FAIL_CODE, -1, -1, -1) + + +class TestDistroChecker(CiTestCase): + + @mock.patch('cloudinit.util.system_info', return_value={'dist': ('', '', + ''), + 'system': ''}) + @mock.patch('platform.linux_distribution', return_value=('', '', '')) + @mock.patch('cloudinit.util.is_FreeBSD', return_value=False) + def test_blank_distro(self, m_sys_info, m_linux_distribution, m_free_bsd): + self.assertEqual(err_code, dist_check_timestamp()) + + @mock.patch('cloudinit.util.system_info', return_value={'dist': ('', '', + '')}) + @mock.patch('platform.linux_distribution', return_value=('', '', '')) + @mock.patch('cloudinit.util.is_FreeBSD', return_value=True) + def test_freebsd_gentoo_cant_find(self, m_sys_info, + m_linux_distribution, m_is_FreeBSD): + self.assertEqual(err_code, dist_check_timestamp()) + + @mock.patch('cloudinit.util.subp', return_value=(0, 1)) + def test_subp_fails(self, m_subp): + self.assertEqual(err_code, dist_check_timestamp()) + + +class TestSystemCtlReader(CiTestCase): + + def test_systemctl_invalid_property(self): + reader = SystemctlReader('dummyProperty') + with self.assertRaises(RuntimeError): + reader.parse_epoch_as_float() + + def test_systemctl_invalid_parameter(self): + reader = SystemctlReader('dummyProperty', 'dummyParameter') + with self.assertRaises(RuntimeError): + reader.parse_epoch_as_float() + + @mock.patch('cloudinit.util.subp', return_value=('U=1000000', None)) + def test_systemctl_works_correctly_threshold(self, m_subp): + reader = SystemctlReader('dummyProperty', 'dummyParameter') + self.assertEqual(1.0, reader.parse_epoch_as_float()) + thresh = 1.0 - reader.parse_epoch_as_float() + self.assertTrue(thresh < 1e-6) + self.assertTrue(thresh > (-1 * 1e-6)) + + @mock.patch('cloudinit.util.subp', return_value=('U=0', None)) + def test_systemctl_succeed_zero(self, m_subp): + reader = SystemctlReader('dummyProperty', 'dummyParameter') + self.assertEqual(0.0, reader.parse_epoch_as_float()) + + @mock.patch('cloudinit.util.subp', return_value=('U=1', None)) + def test_systemctl_succeed_distinct(self, m_subp): + reader = SystemctlReader('dummyProperty', 'dummyParameter') + val1 = reader.parse_epoch_as_float() + m_subp.return_value = ('U=2', None) + reader2 = SystemctlReader('dummyProperty', 'dummyParameter') + val2 = reader2.parse_epoch_as_float() + self.assertNotEqual(val1, val2) + + @mock.patch('cloudinit.util.subp', return_value=('100', None)) + def test_systemctl_epoch_not_splittable(self, m_subp): + reader = SystemctlReader('dummyProperty', 'dummyParameter') + with self.assertRaises(IndexError): + reader.parse_epoch_as_float() + + @mock.patch('cloudinit.util.subp', return_value=('U=foobar', None)) + def test_systemctl_cannot_convert_epoch_to_float(self, m_subp): + reader = SystemctlReader('dummyProperty', 'dummyParameter') + with self.assertRaises(ValueError): + reader.parse_epoch_as_float() + + +class TestAnalyzeBoot(CiTestCase): + + def set_up_dummy_file_ci(self, path, log_path): + infh = open(path, 'w+') + infh.write('2019-07-08 17:40:49,601 - util.py[DEBUG]: Cloud-init v. ' + '19.1-1-gbaa47854-0ubuntu1~18.04.1 running \'init-local\' ' + 'at Mon, 08 Jul 2019 17:40:49 +0000. Up 18.84 seconds.') + infh.close() + outfh = open(log_path, 'w+') + outfh.close() + + def set_up_dummy_file(self, path, log_path): + infh = open(path, 'w+') + infh.write('dummy data') + infh.close() + outfh = open(log_path, 'w+') + outfh.close() + + def remove_dummy_file(self, path, log_path): + if os.path.isfile(path): + os.remove(path) + if os.path.isfile(log_path): + os.remove(log_path) + + @mock.patch('cloudinit.analyze.show.dist_check_timestamp', + return_value=err_code) + def test_boot_invalid_distro(self, m_dist_check_timestamp): + + path = os.path.dirname(os.path.abspath(__file__)) + log_path = path + '/boot-test.log' + path += '/dummy.log' + self.set_up_dummy_file(path, log_path) + + parser = get_parser() + args = parser.parse_args(args=['boot', '-i', path, '-o', + log_path]) + name_default = '' + analyze_boot(name_default, args) + # now args have been tested, go into outfile and make sure error + # message is in the outfile + outfh = open(args.outfile, 'r') + data = outfh.read() + err_string = 'Your Linux distro or container does not support this ' \ + 'functionality.\nYou must be running a Kernel ' \ + 'Telemetry supported distro.\nPlease check ' \ + 'https://cloudinit.readthedocs.io/en/latest/topics' \ + '/analyze.html for more information on supported ' \ + 'distros.\n' + + self.remove_dummy_file(path, log_path) + self.assertEqual(err_string, data) + + @mock.patch("cloudinit.util.is_container", return_value=True) + @mock.patch('cloudinit.util.subp', return_value=('U=1000000', None)) + def test_container_no_ci_log_line(self, m_is_container, m_subp): + path = os.path.dirname(os.path.abspath(__file__)) + log_path = path + '/boot-test.log' + path += '/dummy.log' + self.set_up_dummy_file(path, log_path) + + parser = get_parser() + args = parser.parse_args(args=['boot', '-i', path, '-o', + log_path]) + name_default = '' + + finish_code = analyze_boot(name_default, args) + + self.remove_dummy_file(path, log_path) + self.assertEqual(FAIL_CODE, finish_code) + + @mock.patch("cloudinit.util.is_container", return_value=True) + @mock.patch('cloudinit.util.subp', return_value=('U=1000000', None)) + @mock.patch('cloudinit.analyze.__main__._get_events', return_value=[{ + 'name': 'init-local', 'description': 'starting search', 'timestamp': + 100000}]) + @mock.patch('cloudinit.analyze.show.dist_check_timestamp', + return_value=(CONTAINER_CODE, 1, 1, 1)) + def test_container_ci_log_line(self, m_is_container, m_subp, m_get, m_g): + path = os.path.dirname(os.path.abspath(__file__)) + log_path = path + '/boot-test.log' + path += '/dummy.log' + self.set_up_dummy_file_ci(path, log_path) + + parser = get_parser() + args = parser.parse_args(args=['boot', '-i', path, '-o', + log_path]) + name_default = '' + finish_code = analyze_boot(name_default, args) + + self.remove_dummy_file(path, log_path) + self.assertEqual(CONTAINER_CODE, finish_code) |