From e74d7752f1761c3a8d3c19877de4707d00c49d08 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Mon, 21 Aug 2017 13:46:23 -0600 Subject: tools: Add tooling for basic cloud-init performance analysis. This branch adds cloudinit-analyze into cloud-init proper. It adds an "analyze" subcommand to the cloud-init command line utility for quick performance assessment of cloud-init stages and events. On a cloud-init configured instance, running "cloud-init analyze blame" will now report which cloud-init events cost the most wall time. This allows for quick assessment of the most costly stages of cloud-init. This functionality is pulled from Ryan Harper's analyze work. The cloudinit-analyze main script itself has been refactored a bit for inclusion as a subcommand of cloud-init CLI. There will be a followup branch at some point which will optionally instrument detailed strace profiling, but that approach needs a bit more discussion first. This branch also adds: * additional debugging topic to the sphinx-generated docs describing cloud-init analyze, dump and show as well as cloud-init single usage. * Updates the Makefile unittests target to include cloudinit directory because we now have unittests within that package. LP: #1709761 --- Makefile | 2 +- cloudinit/analyze/__init__.py | 0 cloudinit/analyze/__main__.py | 155 ++++++++++++++++++++++++++ cloudinit/analyze/dump.py | 176 +++++++++++++++++++++++++++++ cloudinit/analyze/show.py | 207 ++++++++++++++++++++++++++++++++++ cloudinit/analyze/tests/test_dump.py | 210 +++++++++++++++++++++++++++++++++++ cloudinit/cmd/main.py | 44 +++----- doc/rtd/index.rst | 1 + doc/rtd/topics/debugging.rst | 146 ++++++++++++++++++++++++ tests/unittests/test_cli.py | 87 ++++++++++++++- 10 files changed, 995 insertions(+), 33 deletions(-) create mode 100644 cloudinit/analyze/__init__.py create mode 100644 cloudinit/analyze/__main__.py create mode 100644 cloudinit/analyze/dump.py create mode 100644 cloudinit/analyze/show.py create mode 100644 cloudinit/analyze/tests/test_dump.py create mode 100644 doc/rtd/topics/debugging.rst diff --git a/Makefile b/Makefile index f280911f..9e7f4ee7 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ pyflakes3: @$(CWD)/tools/run-pyflakes3 unittest: clean_pyc - nosetests $(noseopts) tests/unittests + nosetests $(noseopts) tests/unittests cloudinit unittest3: clean_pyc nosetests3 $(noseopts) tests/unittests diff --git a/cloudinit/analyze/__init__.py b/cloudinit/analyze/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cloudinit/analyze/__main__.py b/cloudinit/analyze/__main__.py new file mode 100644 index 00000000..71cba4f2 --- /dev/null +++ b/cloudinit/analyze/__main__.py @@ -0,0 +1,155 @@ +# Copyright (C) 2017 Canonical Ltd. +# +# This file is part of cloud-init. See LICENSE file for license information. + +import argparse +import re +import sys + +from . import dump +from . import show + + +def get_parser(parser=None): + if not parser: + parser = argparse.ArgumentParser( + prog='cloudinit-analyze', + description='Devel tool: Analyze cloud-init logs and data') + subparsers = parser.add_subparsers(title='Subcommands', dest='subcommand') + subparsers.required = True + + parser_blame = subparsers.add_parser( + 'blame', help='Print list of executed stages ordered by time to init') + parser_blame.add_argument( + '-i', '--infile', action='store', dest='infile', + default='/var/log/cloud-init.log', + help='specify where to read input.') + parser_blame.add_argument( + '-o', '--outfile', action='store', dest='outfile', default='-', + help='specify where to write output. ') + parser_blame.set_defaults(action=('blame', analyze_blame)) + + parser_show = subparsers.add_parser( + 'show', help='Print list of in-order events during execution') + parser_show.add_argument('-f', '--format', action='store', + dest='print_format', default='%I%D @%Es +%ds', + help='specify formatting of output.') + parser_show.add_argument('-i', '--infile', action='store', + dest='infile', default='/var/log/cloud-init.log', + help='specify where to read input.') + parser_show.add_argument('-o', '--outfile', action='store', + dest='outfile', default='-', + help='specify where to write output.') + parser_show.set_defaults(action=('show', analyze_show)) + parser_dump = subparsers.add_parser( + 'dump', help='Dump cloud-init events in JSON format') + parser_dump.add_argument('-i', '--infile', action='store', + dest='infile', default='/var/log/cloud-init.log', + help='specify where to read input. ') + parser_dump.add_argument('-o', '--outfile', action='store', + dest='outfile', default='-', + help='specify where to write output. ') + parser_dump.set_defaults(action=('dump', analyze_dump)) + return parser + + +def analyze_blame(name, args): + """Report a list of records sorted by largest time delta. + + For example: + 30.210s (init-local) searching for datasource + 8.706s (init-network) reading and applying user-data + 166ms (modules-config) .... + 807us (modules-final) ... + + We generate event records parsing cloud-init logs, formatting the output + and sorting by record data ('delta') + """ + (infh, outfh) = configure_io(args) + blame_format = ' %ds (%n)' + r = re.compile('(^\s+\d+\.\d+)', re.MULTILINE) + for idx, record in enumerate(show.show_events(_get_events(infh), + blame_format)): + srecs = sorted(filter(r.match, record), reverse=True) + outfh.write('-- Boot Record %02d --\n' % (idx + 1)) + outfh.write('\n'.join(srecs) + '\n') + outfh.write('\n') + outfh.write('%d boot records analyzed\n' % (idx + 1)) + + +def analyze_show(name, args): + """Generate output records using the 'standard' format to printing events. + + Example output follows: + Starting stage: (init-local) + ... + Finished stage: (init-local) 0.105195 seconds + + Starting stage: (init-network) + ... + Finished stage: (init-network) 0.339024 seconds + + Starting stage: (modules-config) + ... + Finished stage: (modules-config) 0.NNN seconds + + Starting stage: (modules-final) + ... + Finished stage: (modules-final) 0.NNN seconds + """ + (infh, outfh) = configure_io(args) + for idx, record in enumerate(show.show_events(_get_events(infh), + args.print_format)): + outfh.write('-- Boot Record %02d --\n' % (idx + 1)) + outfh.write('The total time elapsed since completing an event is' + ' printed after the "@" character.\n') + outfh.write('The time the event takes is printed after the "+" ' + 'character.\n\n') + outfh.write('\n'.join(record) + '\n') + outfh.write('%d boot records analyzed\n' % (idx + 1)) + + +def analyze_dump(name, args): + """Dump cloud-init events in json format""" + (infh, outfh) = configure_io(args) + outfh.write(dump.json_dumps(_get_events(infh)) + '\n') + + +def _get_events(infile): + rawdata = None + events, rawdata = show.load_events(infile, None) + if not events: + events, _ = dump.dump_events(rawdata=rawdata) + return events + + +def configure_io(args): + """Common parsing and setup of input/output files""" + if args.infile == '-': + infh = sys.stdin + else: + try: + infh = open(args.infile, 'r') + except (FileNotFoundError, PermissionError): + sys.stderr.write('Cannot open file %s\n' % args.infile) + sys.exit(1) + + if args.outfile == '-': + outfh = sys.stdout + else: + try: + outfh = open(args.outfile, 'w') + except PermissionError: + sys.stderr.write('Cannot open file %s\n' % args.outfile) + sys.exit(1) + + return (infh, outfh) + + +if __name__ == '__main__': + parser = get_parser() + args = parser.parse_args() + (name, action_functor) = args.action + action_functor(name, args) + +# vi: ts=4 expandtab diff --git a/cloudinit/analyze/dump.py b/cloudinit/analyze/dump.py new file mode 100644 index 00000000..ca4da496 --- /dev/null +++ b/cloudinit/analyze/dump.py @@ -0,0 +1,176 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +import calendar +from datetime import datetime +import json +import sys + +from cloudinit import util + +stage_to_description = { + 'finished': 'finished running cloud-init', + 'init-local': 'starting search for local datasources', + 'init-network': 'searching for network datasources', + 'init': 'searching for network datasources', + 'modules-config': 'running config modules', + 'modules-final': 'finalizing modules', + 'modules': 'running modules for', + 'single': 'running single module ', +} + +# logger's asctime format +CLOUD_INIT_ASCTIME_FMT = "%Y-%m-%d %H:%M:%S,%f" + +# journctl -o short-precise +CLOUD_INIT_JOURNALCTL_FMT = "%b %d %H:%M:%S.%f %Y" + +# other +DEFAULT_FMT = "%b %d %H:%M:%S %Y" + + +def parse_timestamp(timestampstr): + # default syslog time does not include the current year + months = [calendar.month_abbr[m] for m in range(1, 13)] + if timestampstr.split()[0] in months: + # Aug 29 22:55:26 + FMT = DEFAULT_FMT + if '.' in timestampstr: + FMT = CLOUD_INIT_JOURNALCTL_FMT + dt = datetime.strptime(timestampstr + " " + + str(datetime.now().year), + FMT) + timestamp = dt.strftime("%s.%f") + elif "," in timestampstr: + # 2016-09-12 14:39:20,839 + dt = datetime.strptime(timestampstr, CLOUD_INIT_ASCTIME_FMT) + timestamp = dt.strftime("%s.%f") + else: + # allow date(1) to handle other formats we don't expect + timestamp = parse_timestamp_from_date(timestampstr) + + return float(timestamp) + + +def parse_timestamp_from_date(timestampstr): + out, _ = util.subp(['date', '+%s.%3N', '-d', timestampstr]) + timestamp = out.strip() + return float(timestamp) + + +def parse_ci_logline(line): + # Stage Starts: + # Cloud-init v. 0.7.7 running 'init-local' at \ + # Fri, 02 Sep 2016 19:28:07 +0000. Up 1.0 seconds. + # Cloud-init v. 0.7.7 running 'init' at \ + # Fri, 02 Sep 2016 19:28:08 +0000. Up 2.0 seconds. + # Cloud-init v. 0.7.7 finished at + # Aug 29 22:55:26 test1 [CLOUDINIT] handlers.py[DEBUG]: \ + # finish: modules-final: SUCCESS: running modules for final + # 2016-08-30T21:53:25.972325+00:00 y1 [CLOUDINIT] handlers.py[DEBUG]: \ + # finish: modules-final: SUCCESS: running modules for final + # + # Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT] util.py[DEBUG]: \ + # Cloud-init v. 0.7.8 running 'init-local' at \ + # Thu, 03 Nov 2016 06:51:06 +0000. Up 1.0 seconds. + # + # 2017-05-22 18:02:01,088 - util.py[DEBUG]: Cloud-init v. 0.7.9 running \ + # 'init-local' at Mon, 22 May 2017 18:02:01 +0000. Up 2.0 seconds. + + separators = [' - ', ' [CLOUDINIT] '] + found = False + for sep in separators: + if sep in line: + found = True + break + + if not found: + return None + + (timehost, eventstr) = line.split(sep) + + # journalctl -o short-precise + if timehost.endswith(":"): + timehost = " ".join(timehost.split()[0:-1]) + + if "," in timehost: + timestampstr, extra = timehost.split(",") + timestampstr += ",%s" % extra.split()[0] + if ' ' in extra: + hostname = extra.split()[-1] + else: + hostname = timehost.split()[-1] + timestampstr = timehost.split(hostname)[0].strip() + if 'Cloud-init v.' in eventstr: + event_type = 'start' + if 'running' in eventstr: + stage_and_timestamp = eventstr.split('running')[1].lstrip() + event_name, _ = stage_and_timestamp.split(' at ') + event_name = event_name.replace("'", "").replace(":", "-") + if event_name == "init": + event_name = "init-network" + else: + # don't generate a start for the 'finished at' banner + return None + event_description = stage_to_description[event_name] + else: + (pymodloglvl, event_type, event_name) = eventstr.split()[0:3] + event_description = eventstr.split(event_name)[1].strip() + + event = { + 'name': event_name.rstrip(":"), + 'description': event_description, + 'timestamp': parse_timestamp(timestampstr), + 'origin': 'cloudinit', + 'event_type': event_type.rstrip(":"), + } + if event['event_type'] == "finish": + result = event_description.split(":")[0] + desc = event_description.split(result)[1].lstrip(':').strip() + event['result'] = result + event['description'] = desc.strip() + + return event + + +def json_dumps(data): + return json.dumps(data, indent=1, sort_keys=True, + separators=(',', ': ')) + + +def dump_events(cisource=None, rawdata=None): + events = [] + event = None + CI_EVENT_MATCHES = ['start:', 'finish:', 'Cloud-init v.'] + + if not any([cisource, rawdata]): + raise ValueError('Either cisource or rawdata parameters are required') + + if rawdata: + data = rawdata.splitlines() + else: + data = cisource.readlines() + + for line in data: + for match in CI_EVENT_MATCHES: + if match in line: + try: + event = parse_ci_logline(line) + except ValueError: + sys.stderr.write('Skipping invalid entry\n') + if event: + events.append(event) + + return events, data + + +def main(): + if len(sys.argv) > 1: + cisource = open(sys.argv[1]) + else: + cisource = sys.stdin + + return json_dumps(dump_events(cisource)) + + +if __name__ == "__main__": + print(main()) diff --git a/cloudinit/analyze/show.py b/cloudinit/analyze/show.py new file mode 100644 index 00000000..3b356bb8 --- /dev/null +++ b/cloudinit/analyze/show.py @@ -0,0 +1,207 @@ +# Copyright (C) 2016 Canonical Ltd. +# +# Author: Ryan Harper +# +# This file is part of cloud-init. See LICENSE file for license information. + +import base64 +import datetime +import json +import os + +from cloudinit import util + +# An event: +''' +{ + "description": "executing late commands", + "event_type": "start", + "level": "INFO", + "name": "cmd-install/stage-late" + "origin": "cloudinit", + "timestamp": 1461164249.1590767, +}, + + { + "description": "executing late commands", + "event_type": "finish", + "level": "INFO", + "name": "cmd-install/stage-late", + "origin": "cloudinit", + "result": "SUCCESS", + "timestamp": 1461164249.1590767 + } + +''' +format_key = { + '%d': 'delta', + '%D': 'description', + '%E': 'elapsed', + '%e': 'event_type', + '%I': 'indent', + '%l': 'level', + '%n': 'name', + '%o': 'origin', + '%r': 'result', + '%t': 'timestamp', + '%T': 'total_time', +} + +formatting_help = " ".join(["{0}: {1}".format(k.replace('%', '%%'), v) + for k, v in format_key.items()]) + + +def format_record(msg, event): + for i, j in format_key.items(): + if i in msg: + # ensure consistent formatting of time values + if j in ['delta', 'elapsed', 'timestamp']: + msg = msg.replace(i, "{%s:08.5f}" % j) + else: + msg = msg.replace(i, "{%s}" % j) + return msg.format(**event) + + +def dump_event_files(event): + content = dict((k, v) for k, v in event.items() if k not in ['content']) + files = content['files'] + saved = [] + for f in files: + fname = f['path'] + fn_local = os.path.basename(fname) + fcontent = base64.b64decode(f['content']).decode('ascii') + util.write_file(fn_local, fcontent) + saved.append(fn_local) + + return saved + + +def event_name(event): + if event: + return event.get('name') + return None + + +def event_type(event): + if event: + return event.get('event_type') + return None + + +def event_parent(event): + if event: + return event_name(event).split("/")[0] + return None + + +def event_timestamp(event): + return float(event.get('timestamp')) + + +def event_datetime(event): + return datetime.datetime.utcfromtimestamp(event_timestamp(event)) + + +def delta_seconds(t1, t2): + return (t2 - t1).total_seconds() + + +def event_duration(start, finish): + return delta_seconds(event_datetime(start), event_datetime(finish)) + + +def event_record(start_time, start, finish): + record = finish.copy() + record.update({ + 'delta': event_duration(start, finish), + 'elapsed': delta_seconds(start_time, event_datetime(start)), + 'indent': '|' + ' ' * (event_name(start).count('/') - 1) + '`->', + }) + + return record + + +def total_time_record(total_time): + return 'Total Time: %3.5f seconds\n' % total_time + + +def generate_records(events, blame_sort=False, + print_format="(%n) %d seconds in %I%D", + dump_files=False, log_datafiles=False): + + sorted_events = sorted(events, key=lambda x: x['timestamp']) + records = [] + start_time = None + total_time = 0.0 + stage_start_time = {} + stages_seen = [] + boot_records = [] + + unprocessed = [] + for e in range(0, len(sorted_events)): + event = events[e] + try: + next_evt = events[e + 1] + except IndexError: + next_evt = None + + if event_type(event) == 'start': + if event.get('name') in stages_seen: + records.append(total_time_record(total_time)) + boot_records.append(records) + records = [] + start_time = None + total_time = 0.0 + + if start_time is None: + stages_seen = [] + start_time = event_datetime(event) + stage_start_time[event_parent(event)] = start_time + + # see if we have a pair + if event_name(event) == event_name(next_evt): + if event_type(next_evt) == 'finish': + records.append(format_record(print_format, + event_record(start_time, + event, + next_evt))) + else: + # This is a parent event + records.append("Starting stage: %s" % event.get('name')) + unprocessed.append(event) + stages_seen.append(event.get('name')) + continue + else: + prev_evt = unprocessed.pop() + if event_name(event) == event_name(prev_evt): + record = event_record(start_time, prev_evt, event) + records.append(format_record("Finished stage: " + "(%n) %d seconds ", + record) + "\n") + total_time += record.get('delta') + else: + # not a match, put it back + unprocessed.append(prev_evt) + + records.append(total_time_record(total_time)) + boot_records.append(records) + return boot_records + + +def show_events(events, print_format): + return generate_records(events, print_format=print_format) + + +def load_events(infile, rawdata=None): + if rawdata: + data = rawdata.read() + else: + data = infile.read() + + j = None + try: + j = json.loads(data) + except json.JSONDecodeError: + pass + + return j, data diff --git a/cloudinit/analyze/tests/test_dump.py b/cloudinit/analyze/tests/test_dump.py new file mode 100644 index 00000000..2c0885d0 --- /dev/null +++ b/cloudinit/analyze/tests/test_dump.py @@ -0,0 +1,210 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from datetime import datetime +from textwrap import dedent + +from cloudinit.analyze.dump import ( + dump_events, parse_ci_logline, parse_timestamp) +from cloudinit.util import subp, write_file +from tests.unittests.helpers import CiTestCase + + +class TestParseTimestamp(CiTestCase): + + def test_parse_timestamp_handles_cloud_init_default_format(self): + """Logs with cloud-init detailed formats will be properly parsed.""" + trusty_fmt = '%Y-%m-%d %H:%M:%S,%f' + trusty_stamp = '2016-09-12 14:39:20,839' + + parsed = parse_timestamp(trusty_stamp) + + # convert ourselves + dt = datetime.strptime(trusty_stamp, trusty_fmt) + expected = float(dt.strftime('%s.%f')) + + # use date(1) + out, _err = subp(['date', '+%s.%3N', '-d', trusty_stamp]) + timestamp = out.strip() + date_ts = float(timestamp) + + self.assertEqual(expected, parsed) + self.assertEqual(expected, date_ts) + self.assertEqual(date_ts, parsed) + + def test_parse_timestamp_handles_syslog_adding_year(self): + """Syslog timestamps lack a year. Add year and properly parse.""" + syslog_fmt = '%b %d %H:%M:%S %Y' + syslog_stamp = 'Aug 08 15:12:51' + + # convert stamp ourselves by adding the missing year value + year = datetime.now().year + dt = datetime.strptime(syslog_stamp + " " + str(year), syslog_fmt) + expected = float(dt.strftime('%s.%f')) + parsed = parse_timestamp(syslog_stamp) + + # use date(1) + out, _ = subp(['date', '+%s.%3N', '-d', syslog_stamp]) + timestamp = out.strip() + date_ts = float(timestamp) + + self.assertEqual(expected, parsed) + self.assertEqual(expected, date_ts) + self.assertEqual(date_ts, parsed) + + def test_parse_timestamp_handles_journalctl_format_adding_year(self): + """Journalctl precise timestamps lack a year. Add year and parse.""" + journal_fmt = '%b %d %H:%M:%S.%f %Y' + journal_stamp = 'Aug 08 17:15:50.606811' + + # convert stamp ourselves by adding the missing year value + year = datetime.now().year + dt = datetime.strptime(journal_stamp + " " + str(year), journal_fmt) + expected = float(dt.strftime('%s.%f')) + parsed = parse_timestamp(journal_stamp) + + # use date(1) + out, _ = subp(['date', '+%s.%6N', '-d', journal_stamp]) + timestamp = out.strip() + date_ts = float(timestamp) + + self.assertEqual(expected, parsed) + self.assertEqual(expected, date_ts) + self.assertEqual(date_ts, parsed) + + def test_parse_unexpected_timestamp_format_with_date_command(self): + """Dump sends unexpected timestamp formats to data for processing.""" + new_fmt = '%H:%M %m/%d %Y' + new_stamp = '17:15 08/08' + + # convert stamp ourselves by adding the missing year value + year = datetime.now().year + dt = datetime.strptime(new_stamp + " " + str(year), new_fmt) + expected = float(dt.strftime('%s.%f')) + parsed = parse_timestamp(new_stamp) + + # use date(1) + out, _ = subp(['date', '+%s.%6N', '-d', new_stamp]) + timestamp = out.strip() + date_ts = float(timestamp) + + self.assertEqual(expected, parsed) + self.assertEqual(expected, date_ts) + self.assertEqual(date_ts, parsed) + + +class TestParseCILogLine(CiTestCase): + + def test_parse_logline_returns_none_without_separators(self): + """When no separators are found, parse_ci_logline returns None.""" + expected_parse_ignores = [ + '', '-', 'adsf-asdf', '2017-05-22 18:02:01,088', 'CLOUDINIT'] + for parse_ignores in expected_parse_ignores: + self.assertIsNone(parse_ci_logline(parse_ignores)) + + def test_parse_logline_returns_event_for_cloud_init_logs(self): + """parse_ci_logline returns an event parse from cloud-init format.""" + line = ( + "2017-08-08 20:05:07,147 - util.py[DEBUG]: Cloud-init v. 0.7.9" + " running 'init-local' at Tue, 08 Aug 2017 20:05:07 +0000. Up" + " 6.26 seconds.") + dt = datetime.strptime( + '2017-08-08 20:05:07,147', '%Y-%m-%d %H:%M:%S,%f') + timestamp = float(dt.strftime('%s.%f')) + expected = { + 'description': 'starting search for local datasources', + 'event_type': 'start', + 'name': 'init-local', + 'origin': 'cloudinit', + 'timestamp': timestamp} + self.assertEqual(expected, parse_ci_logline(line)) + + def test_parse_logline_returns_event_for_journalctl_logs(self): + """parse_ci_logline returns an event parse from journalctl format.""" + line = ("Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT]" + " util.py[DEBUG]: Cloud-init v. 0.7.8 running 'init-local' at" + " Thu, 03 Nov 2016 06:51:06 +0000. Up 1.0 seconds.") + year = datetime.now().year + dt = datetime.strptime( + 'Nov 03 06:51:06.074410 %d' % year, '%b %d %H:%M:%S.%f %Y') + timestamp = float(dt.strftime('%s.%f')) + expected = { + 'description': 'starting search for local datasources', + 'event_type': 'start', + 'name': 'init-local', + 'origin': 'cloudinit', + 'timestamp': timestamp} + self.assertEqual(expected, parse_ci_logline(line)) + + def test_parse_logline_returns_event_for_finish_events(self): + """parse_ci_logline returns a finish event for a parsed log line.""" + line = ('2016-08-30 21:53:25.972325+00:00 y1 [CLOUDINIT]' + ' handlers.py[DEBUG]: finish: modules-final: SUCCESS: running' + ' modules for final') + expected = { + 'description': 'running modules for final', + 'event_type': 'finish', + 'name': 'modules-final', + 'origin': 'cloudinit', + 'result': 'SUCCESS', + 'timestamp': 1472594005.972} + self.assertEqual(expected, parse_ci_logline(line)) + + +SAMPLE_LOGS = dedent("""\ +Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT] util.py[DEBUG]:\ + Cloud-init v. 0.7.8 running 'init-local' at Thu, 03 Nov 2016\ + 06:51:06 +0000. Up 1.0 seconds. +2016-08-30 21:53:25.972325+00:00 y1 [CLOUDINIT] handlers.py[DEBUG]: finish:\ + modules-final: SUCCESS: running modules for final +""") + + +class TestDumpEvents(CiTestCase): + maxDiff = None + + def test_dump_events_with_rawdata(self): + """Rawdata is split and parsed into a tuple of events and data""" + events, data = dump_events(rawdata=SAMPLE_LOGS) + expected_data = SAMPLE_LOGS.splitlines() + year = datetime.now().year + dt1 = datetime.strptime( + 'Nov 03 06:51:06.074410 %d' % year, '%b %d %H:%M:%S.%f %Y') + timestamp1 = float(dt1.strftime('%s.%f')) + expected_events = [{ + 'description': 'starting search for local datasources', + 'event_type': 'start', + 'name': 'init-local', + 'origin': 'cloudinit', + 'timestamp': timestamp1}, { + 'description': 'running modules for final', + 'event_type': 'finish', + 'name': 'modules-final', + 'origin': 'cloudinit', + 'result': 'SUCCESS', + 'timestamp': 1472594005.972}] + self.assertEqual(expected_events, events) + self.assertEqual(expected_data, data) + + def test_dump_events_with_cisource(self): + """Cisource file is read and parsed into a tuple of events and data.""" + tmpfile = self.tmp_path('logfile') + write_file(tmpfile, SAMPLE_LOGS) + events, data = dump_events(cisource=open(tmpfile)) + year = datetime.now().year + dt1 = datetime.strptime( + 'Nov 03 06:51:06.074410 %d' % year, '%b %d %H:%M:%S.%f %Y') + timestamp1 = float(dt1.strftime('%s.%f')) + expected_events = [{ + 'description': 'starting search for local datasources', + 'event_type': 'start', + 'name': 'init-local', + 'origin': 'cloudinit', + 'timestamp': timestamp1}, { + 'description': 'running modules for final', + 'event_type': 'finish', + 'name': 'modules-final', + 'origin': 'cloudinit', + 'result': 'SUCCESS', + 'timestamp': 1472594005.972}] + self.assertEqual(expected_events, events) + self.assertEqual(SAMPLE_LOGS.splitlines(), [d.strip() for d in data]) diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 139e03b3..9c0ac864 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -50,13 +50,6 @@ WELCOME_MSG_TPL = ("Cloud-init v. {version} running '{action}' at " # Module section template MOD_SECTION_TPL = "cloud_%s_modules" -# Things u can query on -QUERY_DATA_TYPES = [ - 'data', - 'data_raw', - 'instance_id', -] - # Frequency shortname to full name # (so users don't have to remember the full name...) FREQ_SHORT_NAMES = { @@ -510,11 +503,6 @@ def main_modules(action_name, args): return run_module_section(mods, name, name) -def main_query(name, _args): - raise NotImplementedError(("Action '%s' is not" - " currently implemented") % (name)) - - def main_single(name, args): # Cloud-init single stage is broken up into the following sub-stages # 1. Ensure that the init object fetches its config without errors @@ -713,9 +701,11 @@ def main(sysv_args=None): default=False) parser.set_defaults(reporter=None) - subparsers = parser.add_subparsers() + subparsers = parser.add_subparsers(title='Subcommands', dest='subcommand') + subparsers.required = True # Each action and its sub-options (if any) + parser_init = subparsers.add_parser('init', help=('initializes cloud-init and' ' performs initial modules')) @@ -737,17 +727,6 @@ def main(sysv_args=None): choices=('init', 'config', 'final')) parser_mod.set_defaults(action=('modules', main_modules)) - # These settings are used when you want to query information - # stored in the cloud-init data objects/directories/files - parser_query = subparsers.add_parser('query', - help=('query information stored ' - 'in cloud-init')) - parser_query.add_argument("--name", '-n', action="store", - help="item name to query on", - required=True, - choices=QUERY_DATA_TYPES) - parser_query.set_defaults(action=('query', main_query)) - # This subcommand allows you to run a single module parser_single = subparsers.add_parser('single', help=('run a single module ')) @@ -781,15 +760,22 @@ def main(sysv_args=None): help=('list defined features')) parser_features.set_defaults(action=('features', main_features)) + parser_analyze = subparsers.add_parser( + 'analyze', help='Devel tool: Analyze cloud-init logs and data') + if sysv_args and sysv_args[0] == 'analyze': + # Only load this parser if analyze is specified to avoid file load cost + # FIXME put this under 'devel' subcommand (coming in next branch) + from cloudinit.analyze.__main__ import get_parser as analyze_parser + # Construct analyze subcommand parser + analyze_parser(parser_analyze) + args = parser.parse_args(args=sysv_args) - try: - (name, functor) = args.action - except AttributeError: - parser.error('too few arguments') + # Subparsers.required = True and each subparser sets action=(name, functor) + (name, functor) = args.action # Setup basic logging to start (until reinitialized) - # iff in debug mode... + # iff in debug mode. if args.debug: logging.setupBasicLogging() diff --git a/doc/rtd/index.rst b/doc/rtd/index.rst index a691103e..de67f361 100644 --- a/doc/rtd/index.rst +++ b/doc/rtd/index.rst @@ -40,6 +40,7 @@ initialization of a cloud instance. topics/merging.rst topics/network-config.rst topics/vendordata.rst + topics/debugging.rst topics/moreinfo.rst topics/hacking.rst topics/tests.rst diff --git a/doc/rtd/topics/debugging.rst b/doc/rtd/topics/debugging.rst new file mode 100644 index 00000000..4e43dd57 --- /dev/null +++ b/doc/rtd/topics/debugging.rst @@ -0,0 +1,146 @@ +********************** +Testing and debugging cloud-init +********************** + +Overview +======== +This topic will discuss general approaches for test and debug of cloud-init on +deployed instances. + + +Boot Time Analysis - cloud-init analyze +====================================== +Occasionally instances don't appear as performant as we would like and +cloud-init packages a simple facility to inspect what operations took +cloud-init the longest during boot and setup. + +The script **/usr/bin/cloud-init** has an analyze sub-command **analyze** +which parses any cloud-init.log file into formatted and sorted events. It +allows for detailed analysis of the most costly cloud-init operations are to +determine the long-pole in cloud-init configuration and setup. These +subcommands default to reading /var/log/cloud-init.log. + +* ``analyze show`` Parse and organize cloud-init.log events by stage and +include each sub-stage granularity with time delta reports. + +.. code-block:: bash + + $ cloud-init analyze show -i my-cloud-init.log + -- Boot Record 01 -- + The total time elapsed since completing an event is printed after the "@" + character. + The time the event takes is printed after the "+" character. + + Starting stage: modules-config + |`->config-emit_upstart ran successfully @05.47600s +00.00100s + |`->config-snap_config ran successfully @05.47700s +00.00100s + |`->config-ssh-import-id ran successfully @05.47800s +00.00200s + |`->config-locale ran successfully @05.48000s +00.00100s + ... + + +* ``analyze dump`` Parse cloud-init.log into event records and return a list of +dictionaries that can be consumed for other reporting needs. + +.. code-block:: bash + + $ cloud-init analyze blame -i my-cloud-init.log + [ + { + "description": "running config modules", + "event_type": "start", + "name": "modules-config", + "origin": "cloudinit", + "timestamp": 1510807493.0 + },... + +* ``analyze blame`` Parse cloud-init.log into event records and sort them based +on highest time cost for quick assessment of areas of cloud-init that may need +improvement. + +.. code-block:: bash + + $ cloud-init analyze blame -i my-cloud-init.log + -- Boot Record 11 -- + 00.01300s (modules-final/config-scripts-per-boot) + 00.00400s (modules-final/config-final-message) + 00.00100s (modules-final/config-rightscale_userdata) + ... + + +Analyze quickstart - LXC +--------------------------- +To quickly obtain a cloud-init log try using lxc on any ubuntu system: + +.. code-block:: bash + + $ lxc init ubuntu-daily:xenial x1 + $ lxc start x1 + # Take lxc's cloud-init.log and pipe it to the analyzer + $ lxc file pull x1/var/log/cloud-init.log - | cloud-init analyze dump -i - + $ lxc file pull x1/var/log/cloud-init.log - | \ + python3 -m cloudinit.analyze dump -i - + +Analyze quickstart - KVM +--------------------------- +To quickly analyze a KVM a cloud-init log: + +1. Download the current cloud image + wget https://cloud-images.ubuntu.com/daily/server/xenial/current/xenial-server-cloudimg-amd64.img +2. Create a snapshot image to preserve the original cloud-image + +.. code-block:: bash + + $ qemu-img create -b xenial-server-cloudimg-amd64.img -f qcow2 \ + test-cloudinit.qcow2 + +3. Create a seed image with metadata using `cloud-localds` + +.. code-block:: bash + + $ cat > user-data <.`` which marks +when the module last successfully ran. Presence of this semaphore file +prevents a module from running again if it has already been run. To ensure that +a module is run again, the desired frequency can be overridden on the +commandline: + +.. code-block:: bash + + $ sudo cloud-init single --name cc_ssh --frequency always + ... + Generating public/private ed25519 key pair + ... + +Inspect cloud-init.log for output of what operations were performed as a +result. diff --git a/tests/unittests/test_cli.py b/tests/unittests/test_cli.py index 06f366b2..7780f164 100644 --- a/tests/unittests/test_cli.py +++ b/tests/unittests/test_cli.py @@ -31,9 +31,90 @@ class TestCLI(test_helpers.FilesystemMockingTestCase): def test_no_arguments_shows_error_message(self): exit_code = self._call_main() - self.assertIn('cloud-init: error: too few arguments', - self.stderr.getvalue()) + missing_subcommand_message = [ + 'too few arguments', # python2.7 msg + 'the following arguments are required: subcommand' # python3 msg + ] + error = self.stderr.getvalue() + matches = ([msg in error for msg in missing_subcommand_message]) + self.assertTrue( + any(matches), 'Did not find error message for missing subcommand') self.assertEqual(2, exit_code) + def test_all_subcommands_represented_in_help(self): + """All known subparsers are represented in the cloud-int help doc.""" + self._call_main() + error = self.stderr.getvalue() + expected_subcommands = ['analyze', 'init', 'modules', 'single', + 'dhclient-hook', 'features'] + for subcommand in expected_subcommands: + self.assertIn(subcommand, error) -# vi: ts=4 expandtab + @mock.patch('cloudinit.cmd.main.status_wrapper') + def test_init_subcommand_parser(self, m_status_wrapper): + """The subcommand 'init' calls status_wrapper passing init.""" + self._call_main(['cloud-init', 'init']) + (name, parseargs) = m_status_wrapper.call_args_list[0][0] + self.assertEqual('init', name) + self.assertEqual('init', parseargs.subcommand) + self.assertEqual('init', parseargs.action[0]) + self.assertEqual('main_init', parseargs.action[1].__name__) + + @mock.patch('cloudinit.cmd.main.status_wrapper') + def test_modules_subcommand_parser(self, m_status_wrapper): + """The subcommand 'modules' calls status_wrapper passing modules.""" + self._call_main(['cloud-init', 'modules']) + (name, parseargs) = m_status_wrapper.call_args_list[0][0] + self.assertEqual('modules', name) + self.assertEqual('modules', parseargs.subcommand) + self.assertEqual('modules', parseargs.action[0]) + self.assertEqual('main_modules', parseargs.action[1].__name__) + + def test_analyze_subcommand_parser(self): + """The subcommand cloud-init analyze calls the correct subparser.""" + self._call_main(['cloud-init', 'analyze']) + # These subcommands only valid for cloud-init analyze script + expected_subcommands = ['blame', 'show', 'dump'] + error = self.stderr.getvalue() + for subcommand in expected_subcommands: + self.assertIn(subcommand, error) + + @mock.patch('cloudinit.cmd.main.main_single') + def test_single_subcommand(self, m_main_single): + """The subcommand 'single' calls main_single with valid args.""" + self._call_main(['cloud-init', 'single', '--name', 'cc_ntp']) + (name, parseargs) = m_main_single.call_args_list[0][0] + self.assertEqual('single', name) + self.assertEqual('single', parseargs.subcommand) + self.assertEqual('single', parseargs.action[0]) + self.assertFalse(parseargs.debug) + self.assertFalse(parseargs.force) + self.assertIsNone(parseargs.frequency) + self.assertEqual('cc_ntp', parseargs.name) + self.assertFalse(parseargs.report) + + @mock.patch('cloudinit.cmd.main.dhclient_hook') + def test_dhclient_hook_subcommand(self, m_dhclient_hook): + """The subcommand 'dhclient-hook' calls dhclient_hook with args.""" + self._call_main(['cloud-init', 'dhclient-hook', 'net_action', 'eth0']) + (name, parseargs) = m_dhclient_hook.call_args_list[0][0] + self.assertEqual('dhclient_hook', name) + self.assertEqual('dhclient-hook', parseargs.subcommand) + self.assertEqual('dhclient_hook', parseargs.action[0]) + self.assertFalse(parseargs.debug) + self.assertFalse(parseargs.force) + self.assertEqual('net_action', parseargs.net_action) + self.assertEqual('eth0', parseargs.net_interface) + + @mock.patch('cloudinit.cmd.main.main_features') + def test_features_hook_subcommand(self, m_features): + """The subcommand 'features' calls main_features with args.""" + self._call_main(['cloud-init', 'features']) + (name, parseargs) = m_features.call_args_list[0][0] + self.assertEqual('features', name) + self.assertEqual('features', parseargs.subcommand) + self.assertEqual('features', parseargs.action[0]) + self.assertFalse(parseargs.debug) + self.assertFalse(parseargs.force) + +# : ts=4 expandtab -- cgit v1.2.3