summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChad Smith <chad.smith@canonical.com>2017-08-21 13:46:23 -0600
committerChad Smith <chad.smith@canonical.com>2017-08-21 13:46:23 -0600
commite74d7752f1761c3a8d3c19877de4707d00c49d08 (patch)
tree0627dd5d06d83b770b38196bceabddd06c0b1b77
parentdc2bd79949492bccdc1d7df0132f98c354d51943 (diff)
downloadvyos-cloud-init-e74d7752f1761c3a8d3c19877de4707d00c49d08.tar.gz
vyos-cloud-init-e74d7752f1761c3a8d3c19877de4707d00c49d08.zip
tools: Add tooling for basic cloud-init performance analysis.
This branch adds cloudinit-analyze into cloud-init proper. It adds an "analyze" subcommand to the cloud-init command line utility for quick performance assessment of cloud-init stages and events. On a cloud-init configured instance, running "cloud-init analyze blame" will now report which cloud-init events cost the most wall time. This allows for quick assessment of the most costly stages of cloud-init. This functionality is pulled from Ryan Harper's analyze work. The cloudinit-analyze main script itself has been refactored a bit for inclusion as a subcommand of cloud-init CLI. There will be a followup branch at some point which will optionally instrument detailed strace profiling, but that approach needs a bit more discussion first. This branch also adds: * additional debugging topic to the sphinx-generated docs describing cloud-init analyze, dump and show as well as cloud-init single usage. * Updates the Makefile unittests target to include cloudinit directory because we now have unittests within that package. LP: #1709761
-rw-r--r--Makefile2
-rw-r--r--cloudinit/analyze/__init__.py0
-rw-r--r--cloudinit/analyze/__main__.py155
-rw-r--r--cloudinit/analyze/dump.py176
-rw-r--r--cloudinit/analyze/show.py207
-rw-r--r--cloudinit/analyze/tests/test_dump.py210
-rw-r--r--cloudinit/cmd/main.py44
-rw-r--r--doc/rtd/index.rst1
-rw-r--r--doc/rtd/topics/debugging.rst146
-rw-r--r--tests/unittests/test_cli.py87
10 files changed, 995 insertions, 33 deletions
diff --git a/Makefile b/Makefile
index f280911f..9e7f4ee7 100644
--- a/Makefile
+++ b/Makefile
@@ -48,7 +48,7 @@ pyflakes3:
@$(CWD)/tools/run-pyflakes3
unittest: clean_pyc
- nosetests $(noseopts) tests/unittests
+ nosetests $(noseopts) tests/unittests cloudinit
unittest3: clean_pyc
nosetests3 $(noseopts) tests/unittests
diff --git a/cloudinit/analyze/__init__.py b/cloudinit/analyze/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/cloudinit/analyze/__init__.py
diff --git a/cloudinit/analyze/__main__.py b/cloudinit/analyze/__main__.py
new file mode 100644
index 00000000..71cba4f2
--- /dev/null
+++ b/cloudinit/analyze/__main__.py
@@ -0,0 +1,155 @@
+# Copyright (C) 2017 Canonical Ltd.
+#
+# This file is part of cloud-init. See LICENSE file for license information.
+
+import argparse
+import re
+import sys
+
+from . import dump
+from . import show
+
+
+def get_parser(parser=None):
+ if not parser:
+ parser = argparse.ArgumentParser(
+ prog='cloudinit-analyze',
+ description='Devel tool: Analyze cloud-init logs and data')
+ subparsers = parser.add_subparsers(title='Subcommands', dest='subcommand')
+ subparsers.required = True
+
+ parser_blame = subparsers.add_parser(
+ 'blame', help='Print list of executed stages ordered by time to init')
+ parser_blame.add_argument(
+ '-i', '--infile', action='store', dest='infile',
+ default='/var/log/cloud-init.log',
+ help='specify where to read input.')
+ parser_blame.add_argument(
+ '-o', '--outfile', action='store', dest='outfile', default='-',
+ help='specify where to write output. ')
+ parser_blame.set_defaults(action=('blame', analyze_blame))
+
+ parser_show = subparsers.add_parser(
+ 'show', help='Print list of in-order events during execution')
+ parser_show.add_argument('-f', '--format', action='store',
+ dest='print_format', default='%I%D @%Es +%ds',
+ help='specify formatting of output.')
+ parser_show.add_argument('-i', '--infile', action='store',
+ dest='infile', default='/var/log/cloud-init.log',
+ help='specify where to read input.')
+ parser_show.add_argument('-o', '--outfile', action='store',
+ dest='outfile', default='-',
+ help='specify where to write output.')
+ parser_show.set_defaults(action=('show', analyze_show))
+ parser_dump = subparsers.add_parser(
+ 'dump', help='Dump cloud-init events in JSON format')
+ parser_dump.add_argument('-i', '--infile', action='store',
+ dest='infile', default='/var/log/cloud-init.log',
+ help='specify where to read input. ')
+ parser_dump.add_argument('-o', '--outfile', action='store',
+ dest='outfile', default='-',
+ help='specify where to write output. ')
+ parser_dump.set_defaults(action=('dump', analyze_dump))
+ return parser
+
+
+def analyze_blame(name, args):
+ """Report a list of records sorted by largest time delta.
+
+ For example:
+ 30.210s (init-local) searching for datasource
+ 8.706s (init-network) reading and applying user-data
+ 166ms (modules-config) ....
+ 807us (modules-final) ...
+
+ We generate event records parsing cloud-init logs, formatting the output
+ and sorting by record data ('delta')
+ """
+ (infh, outfh) = configure_io(args)
+ blame_format = ' %ds (%n)'
+ r = re.compile('(^\s+\d+\.\d+)', re.MULTILINE)
+ for idx, record in enumerate(show.show_events(_get_events(infh),
+ blame_format)):
+ srecs = sorted(filter(r.match, record), reverse=True)
+ outfh.write('-- Boot Record %02d --\n' % (idx + 1))
+ outfh.write('\n'.join(srecs) + '\n')
+ outfh.write('\n')
+ outfh.write('%d boot records analyzed\n' % (idx + 1))
+
+
+def analyze_show(name, args):
+ """Generate output records using the 'standard' format to printing events.
+
+ Example output follows:
+ Starting stage: (init-local)
+ ...
+ Finished stage: (init-local) 0.105195 seconds
+
+ Starting stage: (init-network)
+ ...
+ Finished stage: (init-network) 0.339024 seconds
+
+ Starting stage: (modules-config)
+ ...
+ Finished stage: (modules-config) 0.NNN seconds
+
+ Starting stage: (modules-final)
+ ...
+ Finished stage: (modules-final) 0.NNN seconds
+ """
+ (infh, outfh) = configure_io(args)
+ for idx, record in enumerate(show.show_events(_get_events(infh),
+ args.print_format)):
+ outfh.write('-- Boot Record %02d --\n' % (idx + 1))
+ outfh.write('The total time elapsed since completing an event is'
+ ' printed after the "@" character.\n')
+ outfh.write('The time the event takes is printed after the "+" '
+ 'character.\n\n')
+ outfh.write('\n'.join(record) + '\n')
+ outfh.write('%d boot records analyzed\n' % (idx + 1))
+
+
+def analyze_dump(name, args):
+ """Dump cloud-init events in json format"""
+ (infh, outfh) = configure_io(args)
+ outfh.write(dump.json_dumps(_get_events(infh)) + '\n')
+
+
+def _get_events(infile):
+ rawdata = None
+ events, rawdata = show.load_events(infile, None)
+ if not events:
+ events, _ = dump.dump_events(rawdata=rawdata)
+ return events
+
+
+def configure_io(args):
+ """Common parsing and setup of input/output files"""
+ if args.infile == '-':
+ infh = sys.stdin
+ else:
+ try:
+ infh = open(args.infile, 'r')
+ except (FileNotFoundError, PermissionError):
+ sys.stderr.write('Cannot open file %s\n' % args.infile)
+ sys.exit(1)
+
+ if args.outfile == '-':
+ outfh = sys.stdout
+ else:
+ try:
+ outfh = open(args.outfile, 'w')
+ except PermissionError:
+ sys.stderr.write('Cannot open file %s\n' % args.outfile)
+ sys.exit(1)
+
+ return (infh, outfh)
+
+
+if __name__ == '__main__':
+ parser = get_parser()
+ args = parser.parse_args()
+ (name, action_functor) = args.action
+ action_functor(name, args)
+
+# vi: ts=4 expandtab
diff --git a/cloudinit/analyze/dump.py b/cloudinit/analyze/dump.py
new file mode 100644
index 00000000..ca4da496
--- /dev/null
+++ b/cloudinit/analyze/dump.py
@@ -0,0 +1,176 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+import calendar
+from datetime import datetime
+import json
+import sys
+
+from cloudinit import util
+
+stage_to_description = {
+ 'finished': 'finished running cloud-init',
+ 'init-local': 'starting search for local datasources',
+ 'init-network': 'searching for network datasources',
+ 'init': 'searching for network datasources',
+ 'modules-config': 'running config modules',
+ 'modules-final': 'finalizing modules',
+ 'modules': 'running modules for',
+ 'single': 'running single module ',
+}
+
+# logger's asctime format
+CLOUD_INIT_ASCTIME_FMT = "%Y-%m-%d %H:%M:%S,%f"
+
+# journctl -o short-precise
+CLOUD_INIT_JOURNALCTL_FMT = "%b %d %H:%M:%S.%f %Y"
+
+# other
+DEFAULT_FMT = "%b %d %H:%M:%S %Y"
+
+
+def parse_timestamp(timestampstr):
+ # default syslog time does not include the current year
+ months = [calendar.month_abbr[m] for m in range(1, 13)]
+ if timestampstr.split()[0] in months:
+ # Aug 29 22:55:26
+ FMT = DEFAULT_FMT
+ if '.' in timestampstr:
+ FMT = CLOUD_INIT_JOURNALCTL_FMT
+ dt = datetime.strptime(timestampstr + " " +
+ str(datetime.now().year),
+ FMT)
+ timestamp = dt.strftime("%s.%f")
+ elif "," in timestampstr:
+ # 2016-09-12 14:39:20,839
+ dt = datetime.strptime(timestampstr, CLOUD_INIT_ASCTIME_FMT)
+ timestamp = dt.strftime("%s.%f")
+ else:
+ # allow date(1) to handle other formats we don't expect
+ timestamp = parse_timestamp_from_date(timestampstr)
+
+ return float(timestamp)
+
+
+def parse_timestamp_from_date(timestampstr):
+ out, _ = util.subp(['date', '+%s.%3N', '-d', timestampstr])
+ timestamp = out.strip()
+ return float(timestamp)
+
+
+def parse_ci_logline(line):
+ # Stage Starts:
+ # Cloud-init v. 0.7.7 running 'init-local' at \
+ # Fri, 02 Sep 2016 19:28:07 +0000. Up 1.0 seconds.
+ # Cloud-init v. 0.7.7 running 'init' at \
+ # Fri, 02 Sep 2016 19:28:08 +0000. Up 2.0 seconds.
+ # Cloud-init v. 0.7.7 finished at
+ # Aug 29 22:55:26 test1 [CLOUDINIT] handlers.py[DEBUG]: \
+ # finish: modules-final: SUCCESS: running modules for final
+ # 2016-08-30T21:53:25.972325+00:00 y1 [CLOUDINIT] handlers.py[DEBUG]: \
+ # finish: modules-final: SUCCESS: running modules for final
+ #
+ # Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT] util.py[DEBUG]: \
+ # Cloud-init v. 0.7.8 running 'init-local' at \
+ # Thu, 03 Nov 2016 06:51:06 +0000. Up 1.0 seconds.
+ #
+ # 2017-05-22 18:02:01,088 - util.py[DEBUG]: Cloud-init v. 0.7.9 running \
+ # 'init-local' at Mon, 22 May 2017 18:02:01 +0000. Up 2.0 seconds.
+
+ separators = [' - ', ' [CLOUDINIT] ']
+ found = False
+ for sep in separators:
+ if sep in line:
+ found = True
+ break
+
+ if not found:
+ return None
+
+ (timehost, eventstr) = line.split(sep)
+
+ # journalctl -o short-precise
+ if timehost.endswith(":"):
+ timehost = " ".join(timehost.split()[0:-1])
+
+ if "," in timehost:
+ timestampstr, extra = timehost.split(",")
+ timestampstr += ",%s" % extra.split()[0]
+ if ' ' in extra:
+ hostname = extra.split()[-1]
+ else:
+ hostname = timehost.split()[-1]
+ timestampstr = timehost.split(hostname)[0].strip()
+ if 'Cloud-init v.' in eventstr:
+ event_type = 'start'
+ if 'running' in eventstr:
+ stage_and_timestamp = eventstr.split('running')[1].lstrip()
+ event_name, _ = stage_and_timestamp.split(' at ')
+ event_name = event_name.replace("'", "").replace(":", "-")
+ if event_name == "init":
+ event_name = "init-network"
+ else:
+ # don't generate a start for the 'finished at' banner
+ return None
+ event_description = stage_to_description[event_name]
+ else:
+ (pymodloglvl, event_type, event_name) = eventstr.split()[0:3]
+ event_description = eventstr.split(event_name)[1].strip()
+
+ event = {
+ 'name': event_name.rstrip(":"),
+ 'description': event_description,
+ 'timestamp': parse_timestamp(timestampstr),
+ 'origin': 'cloudinit',
+ 'event_type': event_type.rstrip(":"),
+ }
+ if event['event_type'] == "finish":
+ result = event_description.split(":")[0]
+ desc = event_description.split(result)[1].lstrip(':').strip()
+ event['result'] = result
+ event['description'] = desc.strip()
+
+ return event
+
+
+def json_dumps(data):
+ return json.dumps(data, indent=1, sort_keys=True,
+ separators=(',', ': '))
+
+
+def dump_events(cisource=None, rawdata=None):
+ events = []
+ event = None
+ CI_EVENT_MATCHES = ['start:', 'finish:', 'Cloud-init v.']
+
+ if not any([cisource, rawdata]):
+ raise ValueError('Either cisource or rawdata parameters are required')
+
+ if rawdata:
+ data = rawdata.splitlines()
+ else:
+ data = cisource.readlines()
+
+ for line in data:
+ for match in CI_EVENT_MATCHES:
+ if match in line:
+ try:
+ event = parse_ci_logline(line)
+ except ValueError:
+ sys.stderr.write('Skipping invalid entry\n')
+ if event:
+ events.append(event)
+
+ return events, data
+
+
+def main():
+ if len(sys.argv) > 1:
+ cisource = open(sys.argv[1])
+ else:
+ cisource = sys.stdin
+
+ return json_dumps(dump_events(cisource))
+
+
+if __name__ == "__main__":
+ print(main())
diff --git a/cloudinit/analyze/show.py b/cloudinit/analyze/show.py
new file mode 100644
index 00000000..3b356bb8
--- /dev/null
+++ b/cloudinit/analyze/show.py
@@ -0,0 +1,207 @@
+# Copyright (C) 2016 Canonical Ltd.
+#
+# Author: Ryan Harper <ryan.harper@canonical.com>
+#
+# This file is part of cloud-init. See LICENSE file for license information.
+
+import base64
+import datetime
+import json
+import os
+
+from cloudinit import util
+
+# An event:
+'''
+{
+ "description": "executing late commands",
+ "event_type": "start",
+ "level": "INFO",
+ "name": "cmd-install/stage-late"
+ "origin": "cloudinit",
+ "timestamp": 1461164249.1590767,
+},
+
+ {
+ "description": "executing late commands",
+ "event_type": "finish",
+ "level": "INFO",
+ "name": "cmd-install/stage-late",
+ "origin": "cloudinit",
+ "result": "SUCCESS",
+ "timestamp": 1461164249.1590767
+ }
+
+'''
+format_key = {
+ '%d': 'delta',
+ '%D': 'description',
+ '%E': 'elapsed',
+ '%e': 'event_type',
+ '%I': 'indent',
+ '%l': 'level',
+ '%n': 'name',
+ '%o': 'origin',
+ '%r': 'result',
+ '%t': 'timestamp',
+ '%T': 'total_time',
+}
+
+formatting_help = " ".join(["{0}: {1}".format(k.replace('%', '%%'), v)
+ for k, v in format_key.items()])
+
+
+def format_record(msg, event):
+ for i, j in format_key.items():
+ if i in msg:
+ # ensure consistent formatting of time values
+ if j in ['delta', 'elapsed', 'timestamp']:
+ msg = msg.replace(i, "{%s:08.5f}" % j)
+ else:
+ msg = msg.replace(i, "{%s}" % j)
+ return msg.format(**event)
+
+
+def dump_event_files(event):
+ content = dict((k, v) for k, v in event.items() if k not in ['content'])
+ files = content['files']
+ saved = []
+ for f in files:
+ fname = f['path']
+ fn_local = os.path.basename(fname)
+ fcontent = base64.b64decode(f['content']).decode('ascii')
+ util.write_file(fn_local, fcontent)
+ saved.append(fn_local)
+
+ return saved
+
+
+def event_name(event):
+ if event:
+ return event.get('name')
+ return None
+
+
+def event_type(event):
+ if event:
+ return event.get('event_type')
+ return None
+
+
+def event_parent(event):
+ if event:
+ return event_name(event).split("/")[0]
+ return None
+
+
+def event_timestamp(event):
+ return float(event.get('timestamp'))
+
+
+def event_datetime(event):
+ return datetime.datetime.utcfromtimestamp(event_timestamp(event))
+
+
+def delta_seconds(t1, t2):
+ return (t2 - t1).total_seconds()
+
+
+def event_duration(start, finish):
+ return delta_seconds(event_datetime(start), event_datetime(finish))
+
+
+def event_record(start_time, start, finish):
+ record = finish.copy()
+ record.update({
+ 'delta': event_duration(start, finish),
+ 'elapsed': delta_seconds(start_time, event_datetime(start)),
+ 'indent': '|' + ' ' * (event_name(start).count('/') - 1) + '`->',
+ })
+
+ return record
+
+
+def total_time_record(total_time):
+ return 'Total Time: %3.5f seconds\n' % total_time
+
+
+def generate_records(events, blame_sort=False,
+ print_format="(%n) %d seconds in %I%D",
+ dump_files=False, log_datafiles=False):
+
+ sorted_events = sorted(events, key=lambda x: x['timestamp'])
+ records = []
+ start_time = None
+ total_time = 0.0
+ stage_start_time = {}
+ stages_seen = []
+ boot_records = []
+
+ unprocessed = []
+ for e in range(0, len(sorted_events)):
+ event = events[e]
+ try:
+ next_evt = events[e + 1]
+ except IndexError:
+ next_evt = None
+
+ if event_type(event) == 'start':
+ if event.get('name') in stages_seen:
+ records.append(total_time_record(total_time))
+ boot_records.append(records)
+ records = []
+ start_time = None
+ total_time = 0.0
+
+ if start_time is None:
+ stages_seen = []
+ start_time = event_datetime(event)
+ stage_start_time[event_parent(event)] = start_time
+
+ # see if we have a pair
+ if event_name(event) == event_name(next_evt):
+ if event_type(next_evt) == 'finish':
+ records.append(format_record(print_format,
+ event_record(start_time,
+ event,
+ next_evt)))
+ else:
+ # This is a parent event
+ records.append("Starting stage: %s" % event.get('name'))
+ unprocessed.append(event)
+ stages_seen.append(event.get('name'))
+ continue
+ else:
+ prev_evt = unprocessed.pop()
+ if event_name(event) == event_name(prev_evt):
+ record = event_record(start_time, prev_evt, event)
+ records.append(format_record("Finished stage: "
+ "(%n) %d seconds ",
+ record) + "\n")
+ total_time += record.get('delta')
+ else:
+ # not a match, put it back
+ unprocessed.append(prev_evt)
+
+ records.append(total_time_record(total_time))
+ boot_records.append(records)
+ return boot_records
+
+
+def show_events(events, print_format):
+ return generate_records(events, print_format=print_format)
+
+
+def load_events(infile, rawdata=None):
+ if rawdata:
+ data = rawdata.read()
+ else:
+ data = infile.read()
+
+ j = None
+ try:
+ j = json.loads(data)
+ except json.JSONDecodeError:
+ pass
+
+ return j, data
diff --git a/cloudinit/analyze/tests/test_dump.py b/cloudinit/analyze/tests/test_dump.py
new file mode 100644
index 00000000..2c0885d0
--- /dev/null
+++ b/cloudinit/analyze/tests/test_dump.py
@@ -0,0 +1,210 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+from datetime import datetime
+from textwrap import dedent
+
+from cloudinit.analyze.dump import (
+ dump_events, parse_ci_logline, parse_timestamp)
+from cloudinit.util import subp, write_file
+from tests.unittests.helpers import CiTestCase
+
+
+class TestParseTimestamp(CiTestCase):
+
+ def test_parse_timestamp_handles_cloud_init_default_format(self):
+ """Logs with cloud-init detailed formats will be properly parsed."""
+ trusty_fmt = '%Y-%m-%d %H:%M:%S,%f'
+ trusty_stamp = '2016-09-12 14:39:20,839'
+
+ parsed = parse_timestamp(trusty_stamp)
+
+ # convert ourselves
+ dt = datetime.strptime(trusty_stamp, trusty_fmt)
+ expected = float(dt.strftime('%s.%f'))
+
+ # use date(1)
+ out, _err = subp(['date', '+%s.%3N', '-d', trusty_stamp])
+ timestamp = out.strip()
+ date_ts = float(timestamp)
+
+ self.assertEqual(expected, parsed)
+ self.assertEqual(expected, date_ts)
+ self.assertEqual(date_ts, parsed)
+
+ def test_parse_timestamp_handles_syslog_adding_year(self):
+ """Syslog timestamps lack a year. Add year and properly parse."""
+ syslog_fmt = '%b %d %H:%M:%S %Y'
+ syslog_stamp = 'Aug 08 15:12:51'
+
+ # convert stamp ourselves by adding the missing year value
+ year = datetime.now().year
+ dt = datetime.strptime(syslog_stamp + " " + str(year), syslog_fmt)
+ expected = float(dt.strftime('%s.%f'))
+ parsed = parse_timestamp(syslog_stamp)
+
+ # use date(1)
+ out, _ = subp(['date', '+%s.%3N', '-d', syslog_stamp])
+ timestamp = out.strip()
+ date_ts = float(timestamp)
+
+ self.assertEqual(expected, parsed)
+ self.assertEqual(expected, date_ts)
+ self.assertEqual(date_ts, parsed)
+
+ def test_parse_timestamp_handles_journalctl_format_adding_year(self):
+ """Journalctl precise timestamps lack a year. Add year and parse."""
+ journal_fmt = '%b %d %H:%M:%S.%f %Y'
+ journal_stamp = 'Aug 08 17:15:50.606811'
+
+ # convert stamp ourselves by adding the missing year value
+ year = datetime.now().year
+ dt = datetime.strptime(journal_stamp + " " + str(year), journal_fmt)
+ expected = float(dt.strftime('%s.%f'))
+ parsed = parse_timestamp(journal_stamp)
+
+ # use date(1)
+ out, _ = subp(['date', '+%s.%6N', '-d', journal_stamp])
+ timestamp = out.strip()
+ date_ts = float(timestamp)
+
+ self.assertEqual(expected, parsed)
+ self.assertEqual(expected, date_ts)
+ self.assertEqual(date_ts, parsed)
+
+ def test_parse_unexpected_timestamp_format_with_date_command(self):
+ """Dump sends unexpected timestamp formats to data for processing."""
+ new_fmt = '%H:%M %m/%d %Y'
+ new_stamp = '17:15 08/08'
+
+ # convert stamp ourselves by adding the missing year value
+ year = datetime.now().year
+ dt = datetime.strptime(new_stamp + " " + str(year), new_fmt)
+ expected = float(dt.strftime('%s.%f'))
+ parsed = parse_timestamp(new_stamp)
+
+ # use date(1)
+ out, _ = subp(['date', '+%s.%6N', '-d', new_stamp])
+ timestamp = out.strip()
+ date_ts = float(timestamp)
+
+ self.assertEqual(expected, parsed)
+ self.assertEqual(expected, date_ts)
+ self.assertEqual(date_ts, parsed)
+
+
+class TestParseCILogLine(CiTestCase):
+
+ def test_parse_logline_returns_none_without_separators(self):
+ """When no separators are found, parse_ci_logline returns None."""
+ expected_parse_ignores = [
+ '', '-', 'adsf-asdf', '2017-05-22 18:02:01,088', 'CLOUDINIT']
+ for parse_ignores in expected_parse_ignores:
+ self.assertIsNone(parse_ci_logline(parse_ignores))
+
+ def test_parse_logline_returns_event_for_cloud_init_logs(self):
+ """parse_ci_logline returns an event parse from cloud-init format."""
+ line = (
+ "2017-08-08 20:05:07,147 - util.py[DEBUG]: Cloud-init v. 0.7.9"
+ " running 'init-local' at Tue, 08 Aug 2017 20:05:07 +0000. Up"
+ " 6.26 seconds.")
+ dt = datetime.strptime(
+ '2017-08-08 20:05:07,147', '%Y-%m-%d %H:%M:%S,%f')
+ timestamp = float(dt.strftime('%s.%f'))
+ expected = {
+ 'description': 'starting search for local datasources',
+ 'event_type': 'start',
+ 'name': 'init-local',
+ 'origin': 'cloudinit',
+ 'timestamp': timestamp}
+ self.assertEqual(expected, parse_ci_logline(line))
+
+ def test_parse_logline_returns_event_for_journalctl_logs(self):
+ """parse_ci_logline returns an event parse from journalctl format."""
+ line = ("Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT]"
+ " util.py[DEBUG]: Cloud-init v. 0.7.8 running 'init-local' at"
+ " Thu, 03 Nov 2016 06:51:06 +0000. Up 1.0 seconds.")
+ year = datetime.now().year
+ dt = datetime.strptime(
+ 'Nov 03 06:51:06.074410 %d' % year, '%b %d %H:%M:%S.%f %Y')
+ timestamp = float(dt.strftime('%s.%f'))
+ expected = {
+ 'description': 'starting search for local datasources',
+ 'event_type': 'start',
+ 'name': 'init-local',
+ 'origin': 'cloudinit',
+ 'timestamp': timestamp}
+ self.assertEqual(expected, parse_ci_logline(line))
+
+ def test_parse_logline_returns_event_for_finish_events(self):
+ """parse_ci_logline returns a finish event for a parsed log line."""
+ line = ('2016-08-30 21:53:25.972325+00:00 y1 [CLOUDINIT]'
+ ' handlers.py[DEBUG]: finish: modules-final: SUCCESS: running'
+ ' modules for final')
+ expected = {
+ 'description': 'running modules for final',
+ 'event_type': 'finish',
+ 'name': 'modules-final',
+ 'origin': 'cloudinit',
+ 'result': 'SUCCESS',
+ 'timestamp': 1472594005.972}
+ self.assertEqual(expected, parse_ci_logline(line))
+
+
+SAMPLE_LOGS = dedent("""\
+Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT] util.py[DEBUG]:\
+ Cloud-init v. 0.7.8 running 'init-local' at Thu, 03 Nov 2016\
+ 06:51:06 +0000. Up 1.0 seconds.
+2016-08-30 21:53:25.972325+00:00 y1 [CLOUDINIT] handlers.py[DEBUG]: finish:\
+ modules-final: SUCCESS: running modules for final
+""")
+
+
+class TestDumpEvents(CiTestCase):
+ maxDiff = None
+
+ def test_dump_events_with_rawdata(self):
+ """Rawdata is split and parsed into a tuple of events and data"""
+ events, data = dump_events(rawdata=SAMPLE_LOGS)
+ expected_data = SAMPLE_LOGS.splitlines()
+ year = datetime.now().year
+ dt1 = datetime.strptime(
+ 'Nov 03 06:51:06.074410 %d' % year, '%b %d %H:%M:%S.%f %Y')
+ timestamp1 = float(dt1.strftime('%s.%f'))
+ expected_events = [{
+ 'description': 'starting search for local datasources',
+ 'event_type': 'start',
+ 'name': 'init-local',
+ 'origin': 'cloudinit',
+ 'timestamp': timestamp1}, {
+ 'description': 'running modules for final',
+ 'event_type': 'finish',
+ 'name': 'modules-final',
+ 'origin': 'cloudinit',
+ 'result': 'SUCCESS',
+ 'timestamp': 1472594005.972}]
+ self.assertEqual(expected_events, events)
+ self.assertEqual(expected_data, data)
+
+ def test_dump_events_with_cisource(self):
+ """Cisource file is read and parsed into a tuple of events and data."""
+ tmpfile = self.tmp_path('logfile')
+ write_file(tmpfile, SAMPLE_LOGS)
+ events, data = dump_events(cisource=open(tmpfile))
+ year = datetime.now().year
+ dt1 = datetime.strptime(
+ 'Nov 03 06:51:06.074410 %d' % year, '%b %d %H:%M:%S.%f %Y')
+ timestamp1 = float(dt1.strftime('%s.%f'))
+ expected_events = [{
+ 'description': 'starting search for local datasources',
+ 'event_type': 'start',
+ 'name': 'init-local',
+ 'origin': 'cloudinit',
+ 'timestamp': timestamp1}, {
+ 'description': 'running modules for final',
+ 'event_type': 'finish',
+ 'name': 'modules-final',
+ 'origin': 'cloudinit',
+ 'result': 'SUCCESS',
+ 'timestamp': 1472594005.972}]
+ self.assertEqual(expected_events, events)
+ self.assertEqual(SAMPLE_LOGS.splitlines(), [d.strip() for d in data])
diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py
index 139e03b3..9c0ac864 100644
--- a/cloudinit/cmd/main.py
+++ b/cloudinit/cmd/main.py
@@ -50,13 +50,6 @@ WELCOME_MSG_TPL = ("Cloud-init v. {version} running '{action}' at "
# Module section template
MOD_SECTION_TPL = "cloud_%s_modules"
-# Things u can query on
-QUERY_DATA_TYPES = [
- 'data',
- 'data_raw',
- 'instance_id',
-]
-
# Frequency shortname to full name
# (so users don't have to remember the full name...)
FREQ_SHORT_NAMES = {
@@ -510,11 +503,6 @@ def main_modules(action_name, args):
return run_module_section(mods, name, name)
-def main_query(name, _args):
- raise NotImplementedError(("Action '%s' is not"
- " currently implemented") % (name))
-
-
def main_single(name, args):
# Cloud-init single stage is broken up into the following sub-stages
# 1. Ensure that the init object fetches its config without errors
@@ -713,9 +701,11 @@ def main(sysv_args=None):
default=False)
parser.set_defaults(reporter=None)
- subparsers = parser.add_subparsers()
+ subparsers = parser.add_subparsers(title='Subcommands', dest='subcommand')
+ subparsers.required = True
# Each action and its sub-options (if any)
+
parser_init = subparsers.add_parser('init',
help=('initializes cloud-init and'
' performs initial modules'))
@@ -737,17 +727,6 @@ def main(sysv_args=None):
choices=('init', 'config', 'final'))
parser_mod.set_defaults(action=('modules', main_modules))
- # These settings are used when you want to query information
- # stored in the cloud-init data objects/directories/files
- parser_query = subparsers.add_parser('query',
- help=('query information stored '
- 'in cloud-init'))
- parser_query.add_argument("--name", '-n', action="store",
- help="item name to query on",
- required=True,
- choices=QUERY_DATA_TYPES)
- parser_query.set_defaults(action=('query', main_query))
-
# This subcommand allows you to run a single module
parser_single = subparsers.add_parser('single',
help=('run a single module '))
@@ -781,15 +760,22 @@ def main(sysv_args=None):
help=('list defined features'))
parser_features.set_defaults(action=('features', main_features))
+ parser_analyze = subparsers.add_parser(
+ 'analyze', help='Devel tool: Analyze cloud-init logs and data')
+ if sysv_args and sysv_args[0] == 'analyze':
+ # Only load this parser if analyze is specified to avoid file load cost
+ # FIXME put this under 'devel' subcommand (coming in next branch)
+ from cloudinit.analyze.__main__ import get_parser as analyze_parser
+ # Construct analyze subcommand parser
+ analyze_parser(parser_analyze)
+
args = parser.parse_args(args=sysv_args)
- try:
- (name, functor) = args.action
- except AttributeError:
- parser.error('too few arguments')
+ # Subparsers.required = True and each subparser sets action=(name, functor)
+ (name, functor) = args.action
# Setup basic logging to start (until reinitialized)
- # iff in debug mode...
+ # iff in debug mode.
if args.debug:
logging.setupBasicLogging()
diff --git a/doc/rtd/index.rst b/doc/rtd/index.rst
index a691103e..de67f361 100644
--- a/doc/rtd/index.rst
+++ b/doc/rtd/index.rst
@@ -40,6 +40,7 @@ initialization of a cloud instance.
topics/merging.rst
topics/network-config.rst
topics/vendordata.rst
+ topics/debugging.rst
topics/moreinfo.rst
topics/hacking.rst
topics/tests.rst
diff --git a/doc/rtd/topics/debugging.rst b/doc/rtd/topics/debugging.rst
new file mode 100644
index 00000000..4e43dd57
--- /dev/null
+++ b/doc/rtd/topics/debugging.rst
@@ -0,0 +1,146 @@
+**********************
+Testing and debugging cloud-init
+**********************
+
+Overview
+========
+This topic will discuss general approaches for test and debug of cloud-init on
+deployed instances.
+
+
+Boot Time Analysis - cloud-init analyze
+======================================
+Occasionally instances don't appear as performant as we would like and
+cloud-init packages a simple facility to inspect what operations took
+cloud-init the longest during boot and setup.
+
+The script **/usr/bin/cloud-init** has an analyze sub-command **analyze**
+which parses any cloud-init.log file into formatted and sorted events. It
+allows for detailed analysis of the most costly cloud-init operations are to
+determine the long-pole in cloud-init configuration and setup. These
+subcommands default to reading /var/log/cloud-init.log.
+
+* ``analyze show`` Parse and organize cloud-init.log events by stage and
+include each sub-stage granularity with time delta reports.
+
+.. code-block:: bash
+
+ $ cloud-init analyze show -i my-cloud-init.log
+ -- Boot Record 01 --
+ The total time elapsed since completing an event is printed after the "@"
+ character.
+ The time the event takes is printed after the "+" character.
+
+ Starting stage: modules-config
+ |`->config-emit_upstart ran successfully @05.47600s +00.00100s
+ |`->config-snap_config ran successfully @05.47700s +00.00100s
+ |`->config-ssh-import-id ran successfully @05.47800s +00.00200s
+ |`->config-locale ran successfully @05.48000s +00.00100s
+ ...
+
+
+* ``analyze dump`` Parse cloud-init.log into event records and return a list of
+dictionaries that can be consumed for other reporting needs.
+
+.. code-block:: bash
+
+ $ cloud-init analyze blame -i my-cloud-init.log
+ [
+ {
+ "description": "running config modules",
+ "event_type": "start",
+ "name": "modules-config",
+ "origin": "cloudinit",
+ "timestamp": 1510807493.0
+ },...
+
+* ``analyze blame`` Parse cloud-init.log into event records and sort them based
+on highest time cost for quick assessment of areas of cloud-init that may need
+improvement.
+
+.. code-block:: bash
+
+ $ cloud-init analyze blame -i my-cloud-init.log
+ -- Boot Record 11 --
+ 00.01300s (modules-final/config-scripts-per-boot)
+ 00.00400s (modules-final/config-final-message)
+ 00.00100s (modules-final/config-rightscale_userdata)
+ ...
+
+
+Analyze quickstart - LXC
+---------------------------
+To quickly obtain a cloud-init log try using lxc on any ubuntu system:
+
+.. code-block:: bash
+
+ $ lxc init ubuntu-daily:xenial x1
+ $ lxc start x1
+ # Take lxc's cloud-init.log and pipe it to the analyzer
+ $ lxc file pull x1/var/log/cloud-init.log - | cloud-init analyze dump -i -
+ $ lxc file pull x1/var/log/cloud-init.log - | \
+ python3 -m cloudinit.analyze dump -i -
+
+Analyze quickstart - KVM
+---------------------------
+To quickly analyze a KVM a cloud-init log:
+
+1. Download the current cloud image
+ wget https://cloud-images.ubuntu.com/daily/server/xenial/current/xenial-server-cloudimg-amd64.img
+2. Create a snapshot image to preserve the original cloud-image
+
+.. code-block:: bash
+
+ $ qemu-img create -b xenial-server-cloudimg-amd64.img -f qcow2 \
+ test-cloudinit.qcow2
+
+3. Create a seed image with metadata using `cloud-localds`
+
+.. code-block:: bash
+
+ $ cat > user-data <<EOF
+ #cloud-config
+ password: passw0rd
+ chpasswd: { expire: False }
+ EOF
+ $ cloud-localds my-seed.img user-data
+
+4. Launch your modified VM
+
+.. code-block:: bash
+
+ $ kvm -m 512 -net nic -net user -redir tcp:2222::22 \
+ -drive file=test-cloudinit.qcow2,if=virtio,format=qcow2 \
+ -drive file=my-seed.img,if=virtio,format=raw
+
+5. Analyze the boot (blame, dump, show)
+
+.. code-block:: bash
+
+ $ ssh -p 2222 ubuntu@localhost 'cat /var/log/cloud-init.log' | \
+ cloud-init analyze blame -i -
+
+
+Running single cloud config modules
+===================================
+This subcommand is not called by the init system. It can be called manually to
+load the configured datasource and run a single cloud-config module once using
+the cached userdata and metadata after the instance has booted. Each
+cloud-config module has a module FREQUENCY configured: PER_INSTANCE, PER_BOOT,
+PER_ONCE or PER_ALWAYS. When a module is run by cloud-init, it stores a
+semaphore file in
+``/var/lib/cloud/instance/sem/config_<module_name>.<frequency>`` which marks
+when the module last successfully ran. Presence of this semaphore file
+prevents a module from running again if it has already been run. To ensure that
+a module is run again, the desired frequency can be overridden on the
+commandline:
+
+.. code-block:: bash
+
+ $ sudo cloud-init single --name cc_ssh --frequency always
+ ...
+ Generating public/private ed25519 key pair
+ ...
+
+Inspect cloud-init.log for output of what operations were performed as a
+result.
diff --git a/tests/unittests/test_cli.py b/tests/unittests/test_cli.py
index 06f366b2..7780f164 100644
--- a/tests/unittests/test_cli.py
+++ b/tests/unittests/test_cli.py
@@ -31,9 +31,90 @@ class TestCLI(test_helpers.FilesystemMockingTestCase):
def test_no_arguments_shows_error_message(self):
exit_code = self._call_main()
- self.assertIn('cloud-init: error: too few arguments',
- self.stderr.getvalue())
+ missing_subcommand_message = [
+ 'too few arguments', # python2.7 msg
+ 'the following arguments are required: subcommand' # python3 msg
+ ]
+ error = self.stderr.getvalue()
+ matches = ([msg in error for msg in missing_subcommand_message])
+ self.assertTrue(
+ any(matches), 'Did not find error message for missing subcommand')
self.assertEqual(2, exit_code)
+ def test_all_subcommands_represented_in_help(self):
+ """All known subparsers are represented in the cloud-int help doc."""
+ self._call_main()
+ error = self.stderr.getvalue()
+ expected_subcommands = ['analyze', 'init', 'modules', 'single',
+ 'dhclient-hook', 'features']
+ for subcommand in expected_subcommands:
+ self.assertIn(subcommand, error)
-# vi: ts=4 expandtab
+ @mock.patch('cloudinit.cmd.main.status_wrapper')
+ def test_init_subcommand_parser(self, m_status_wrapper):
+ """The subcommand 'init' calls status_wrapper passing init."""
+ self._call_main(['cloud-init', 'init'])
+ (name, parseargs) = m_status_wrapper.call_args_list[0][0]
+ self.assertEqual('init', name)
+ self.assertEqual('init', parseargs.subcommand)
+ self.assertEqual('init', parseargs.action[0])
+ self.assertEqual('main_init', parseargs.action[1].__name__)
+
+ @mock.patch('cloudinit.cmd.main.status_wrapper')
+ def test_modules_subcommand_parser(self, m_status_wrapper):
+ """The subcommand 'modules' calls status_wrapper passing modules."""
+ self._call_main(['cloud-init', 'modules'])
+ (name, parseargs) = m_status_wrapper.call_args_list[0][0]
+ self.assertEqual('modules', name)
+ self.assertEqual('modules', parseargs.subcommand)
+ self.assertEqual('modules', parseargs.action[0])
+ self.assertEqual('main_modules', parseargs.action[1].__name__)
+
+ def test_analyze_subcommand_parser(self):
+ """The subcommand cloud-init analyze calls the correct subparser."""
+ self._call_main(['cloud-init', 'analyze'])
+ # These subcommands only valid for cloud-init analyze script
+ expected_subcommands = ['blame', 'show', 'dump']
+ error = self.stderr.getvalue()
+ for subcommand in expected_subcommands:
+ self.assertIn(subcommand, error)
+
+ @mock.patch('cloudinit.cmd.main.main_single')
+ def test_single_subcommand(self, m_main_single):
+ """The subcommand 'single' calls main_single with valid args."""
+ self._call_main(['cloud-init', 'single', '--name', 'cc_ntp'])
+ (name, parseargs) = m_main_single.call_args_list[0][0]
+ self.assertEqual('single', name)
+ self.assertEqual('single', parseargs.subcommand)
+ self.assertEqual('single', parseargs.action[0])
+ self.assertFalse(parseargs.debug)
+ self.assertFalse(parseargs.force)
+ self.assertIsNone(parseargs.frequency)
+ self.assertEqual('cc_ntp', parseargs.name)
+ self.assertFalse(parseargs.report)
+
+ @mock.patch('cloudinit.cmd.main.dhclient_hook')
+ def test_dhclient_hook_subcommand(self, m_dhclient_hook):
+ """The subcommand 'dhclient-hook' calls dhclient_hook with args."""
+ self._call_main(['cloud-init', 'dhclient-hook', 'net_action', 'eth0'])
+ (name, parseargs) = m_dhclient_hook.call_args_list[0][0]
+ self.assertEqual('dhclient_hook', name)
+ self.assertEqual('dhclient-hook', parseargs.subcommand)
+ self.assertEqual('dhclient_hook', parseargs.action[0])
+ self.assertFalse(parseargs.debug)
+ self.assertFalse(parseargs.force)
+ self.assertEqual('net_action', parseargs.net_action)
+ self.assertEqual('eth0', parseargs.net_interface)
+
+ @mock.patch('cloudinit.cmd.main.main_features')
+ def test_features_hook_subcommand(self, m_features):
+ """The subcommand 'features' calls main_features with args."""
+ self._call_main(['cloud-init', 'features'])
+ (name, parseargs) = m_features.call_args_list[0][0]
+ self.assertEqual('features', name)
+ self.assertEqual('features', parseargs.subcommand)
+ self.assertEqual('features', parseargs.action[0])
+ self.assertFalse(parseargs.debug)
+ self.assertFalse(parseargs.force)
+
+# : ts=4 expandtab