From e74d7752f1761c3a8d3c19877de4707d00c49d08 Mon Sep 17 00:00:00 2001
From: Chad Smith <chad.smith@canonical.com>
Date: Mon, 21 Aug 2017 13:46:23 -0600
Subject: tools: Add tooling for basic cloud-init performance analysis.

This branch adds cloudinit-analyze into cloud-init proper. It adds an
"analyze" subcommand to the cloud-init command line utility for quick
performance assessment of cloud-init stages and events.

On a cloud-init configured instance, running "cloud-init analyze blame"
will now report which cloud-init events cost the most wall time. This
allows for quick assessment of the most costly stages of cloud-init.

This functionality is pulled from Ryan Harper's analyze work.

The cloudinit-analyze main script itself has been refactored a bit for
inclusion as a subcommand of cloud-init CLI. There will be a followup
branch at some point which will optionally instrument detailed strace
profiling, but that approach needs a bit more discussion first.

This branch also adds:
 * additional debugging topic to the sphinx-generated docs describing
   cloud-init analyze, dump and show as well as cloud-init single usage.
 * Updates the Makefile unittests target to include cloudinit directory
   because we now have unittests within that package.

LP: #1709761
---
 Makefile                             |   2 +-
 cloudinit/analyze/__init__.py        |   0
 cloudinit/analyze/__main__.py        | 155 ++++++++++++++++++++++++++
 cloudinit/analyze/dump.py            | 176 +++++++++++++++++++++++++++++
 cloudinit/analyze/show.py            | 207 ++++++++++++++++++++++++++++++++++
 cloudinit/analyze/tests/test_dump.py | 210 +++++++++++++++++++++++++++++++++++
 cloudinit/cmd/main.py                |  44 +++-----
 doc/rtd/index.rst                    |   1 +
 doc/rtd/topics/debugging.rst         | 146 ++++++++++++++++++++++++
 tests/unittests/test_cli.py          |  87 ++++++++++++++-
 10 files changed, 995 insertions(+), 33 deletions(-)
 create mode 100644 cloudinit/analyze/__init__.py
 create mode 100644 cloudinit/analyze/__main__.py
 create mode 100644 cloudinit/analyze/dump.py
 create mode 100644 cloudinit/analyze/show.py
 create mode 100644 cloudinit/analyze/tests/test_dump.py
 create mode 100644 doc/rtd/topics/debugging.rst

diff --git a/Makefile b/Makefile
index f280911f..9e7f4ee7 100644
--- a/Makefile
+++ b/Makefile
@@ -48,7 +48,7 @@ pyflakes3:
 	@$(CWD)/tools/run-pyflakes3
 
 unittest: clean_pyc
-	nosetests $(noseopts) tests/unittests
+	nosetests $(noseopts) tests/unittests cloudinit
 
 unittest3: clean_pyc
 	nosetests3 $(noseopts) tests/unittests
diff --git a/cloudinit/analyze/__init__.py b/cloudinit/analyze/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/cloudinit/analyze/__main__.py b/cloudinit/analyze/__main__.py
new file mode 100644
index 00000000..71cba4f2
--- /dev/null
+++ b/cloudinit/analyze/__main__.py
@@ -0,0 +1,155 @@
+# Copyright (C) 2017 Canonical Ltd.
+#
+# This file is part of cloud-init. See LICENSE file for license information.
+
+import argparse
+import re
+import sys
+
+from . import dump
+from . import show
+
+
+def get_parser(parser=None):
+    if not parser:
+        parser = argparse.ArgumentParser(
+            prog='cloudinit-analyze',
+            description='Devel tool: Analyze cloud-init logs and data')
+    subparsers = parser.add_subparsers(title='Subcommands', dest='subcommand')
+    subparsers.required = True
+
+    parser_blame = subparsers.add_parser(
+        'blame', help='Print list of executed stages ordered by time to init')
+    parser_blame.add_argument(
+        '-i', '--infile', action='store', dest='infile',
+        default='/var/log/cloud-init.log',
+        help='specify where to read input.')
+    parser_blame.add_argument(
+        '-o', '--outfile', action='store', dest='outfile', default='-',
+        help='specify where to write output. ')
+    parser_blame.set_defaults(action=('blame', analyze_blame))
+
+    parser_show = subparsers.add_parser(
+        'show', help='Print list of in-order events during execution')
+    parser_show.add_argument('-f', '--format', action='store',
+                             dest='print_format', default='%I%D @%Es +%ds',
+                             help='specify formatting of output.')
+    parser_show.add_argument('-i', '--infile', action='store',
+                             dest='infile', default='/var/log/cloud-init.log',
+                             help='specify where to read input.')
+    parser_show.add_argument('-o', '--outfile', action='store',
+                             dest='outfile', default='-',
+                             help='specify where to write output.')
+    parser_show.set_defaults(action=('show', analyze_show))
+    parser_dump = subparsers.add_parser(
+        'dump', help='Dump cloud-init events in JSON format')
+    parser_dump.add_argument('-i', '--infile', action='store',
+                             dest='infile', default='/var/log/cloud-init.log',
+                             help='specify where to read input. ')
+    parser_dump.add_argument('-o', '--outfile', action='store',
+                             dest='outfile', default='-',
+                             help='specify where to write output. ')
+    parser_dump.set_defaults(action=('dump', analyze_dump))
+    return parser
+
+
+def analyze_blame(name, args):
+    """Report a list of records sorted by largest time delta.
+
+    For example:
+      30.210s (init-local) searching for datasource
+       8.706s (init-network) reading and applying user-data
+        166ms (modules-config) ....
+        807us (modules-final) ...
+
+    We generate event records parsing cloud-init logs, formatting the output
+    and sorting by record data ('delta')
+    """
+    (infh, outfh) = configure_io(args)
+    blame_format = '     %ds (%n)'
+    r = re.compile('(^\s+\d+\.\d+)', re.MULTILINE)
+    for idx, record in enumerate(show.show_events(_get_events(infh),
+                                                  blame_format)):
+        srecs = sorted(filter(r.match, record), reverse=True)
+        outfh.write('-- Boot Record %02d --\n' % (idx + 1))
+        outfh.write('\n'.join(srecs) + '\n')
+        outfh.write('\n')
+    outfh.write('%d boot records analyzed\n' % (idx + 1))
+
+
+def analyze_show(name, args):
+    """Generate output records using the 'standard' format to printing events.
+
+    Example output follows:
+        Starting stage: (init-local)
+          ...
+        Finished stage: (init-local) 0.105195 seconds
+
+        Starting stage: (init-network)
+          ...
+        Finished stage: (init-network) 0.339024 seconds
+
+        Starting stage: (modules-config)
+          ...
+        Finished stage: (modules-config) 0.NNN seconds
+
+        Starting stage: (modules-final)
+          ...
+        Finished stage: (modules-final) 0.NNN seconds
+    """
+    (infh, outfh) = configure_io(args)
+    for idx, record in enumerate(show.show_events(_get_events(infh),
+                                                  args.print_format)):
+        outfh.write('-- Boot Record %02d --\n' % (idx + 1))
+        outfh.write('The total time elapsed since completing an event is'
+                    ' printed after the "@" character.\n')
+        outfh.write('The time the event takes is printed after the "+" '
+                    'character.\n\n')
+        outfh.write('\n'.join(record) + '\n')
+    outfh.write('%d boot records analyzed\n' % (idx + 1))
+
+
+def analyze_dump(name, args):
+    """Dump cloud-init events in json format"""
+    (infh, outfh) = configure_io(args)
+    outfh.write(dump.json_dumps(_get_events(infh)) + '\n')
+
+
+def _get_events(infile):
+    rawdata = None
+    events, rawdata = show.load_events(infile, None)
+    if not events:
+        events, _ = dump.dump_events(rawdata=rawdata)
+    return events
+
+
+def configure_io(args):
+    """Common parsing and setup of input/output files"""
+    if args.infile == '-':
+        infh = sys.stdin
+    else:
+        try:
+            infh = open(args.infile, 'r')
+        except (FileNotFoundError, PermissionError):
+            sys.stderr.write('Cannot open file %s\n' % args.infile)
+            sys.exit(1)
+
+    if args.outfile == '-':
+        outfh = sys.stdout
+    else:
+        try:
+            outfh = open(args.outfile, 'w')
+        except PermissionError:
+            sys.stderr.write('Cannot open file %s\n' % args.outfile)
+            sys.exit(1)
+
+    return (infh, outfh)
+
+
+if __name__ == '__main__':
+    parser = get_parser()
+    args = parser.parse_args()
+    (name, action_functor) = args.action
+    action_functor(name, args)
+
+# vi: ts=4 expandtab
diff --git a/cloudinit/analyze/dump.py b/cloudinit/analyze/dump.py
new file mode 100644
index 00000000..ca4da496
--- /dev/null
+++ b/cloudinit/analyze/dump.py
@@ -0,0 +1,176 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+import calendar
+from datetime import datetime
+import json
+import sys
+
+from cloudinit import util
+
+stage_to_description = {
+    'finished': 'finished running cloud-init',
+    'init-local': 'starting search for local datasources',
+    'init-network': 'searching for network datasources',
+    'init': 'searching for network datasources',
+    'modules-config': 'running config modules',
+    'modules-final': 'finalizing modules',
+    'modules': 'running modules for',
+    'single': 'running single module ',
+}
+
+# logger's asctime format
+CLOUD_INIT_ASCTIME_FMT = "%Y-%m-%d %H:%M:%S,%f"
+
+# journctl -o short-precise
+CLOUD_INIT_JOURNALCTL_FMT = "%b %d %H:%M:%S.%f %Y"
+
+# other
+DEFAULT_FMT = "%b %d %H:%M:%S %Y"
+
+
+def parse_timestamp(timestampstr):
+    # default syslog time does not include the current year
+    months = [calendar.month_abbr[m] for m in range(1, 13)]
+    if timestampstr.split()[0] in months:
+        # Aug 29 22:55:26
+        FMT = DEFAULT_FMT
+        if '.' in timestampstr:
+            FMT = CLOUD_INIT_JOURNALCTL_FMT
+        dt = datetime.strptime(timestampstr + " " +
+                               str(datetime.now().year),
+                               FMT)
+        timestamp = dt.strftime("%s.%f")
+    elif "," in timestampstr:
+        # 2016-09-12 14:39:20,839
+        dt = datetime.strptime(timestampstr, CLOUD_INIT_ASCTIME_FMT)
+        timestamp = dt.strftime("%s.%f")
+    else:
+        # allow date(1) to handle other formats we don't expect
+        timestamp = parse_timestamp_from_date(timestampstr)
+
+    return float(timestamp)
+
+
+def parse_timestamp_from_date(timestampstr):
+    out, _ = util.subp(['date', '+%s.%3N', '-d', timestampstr])
+    timestamp = out.strip()
+    return float(timestamp)
+
+
+def parse_ci_logline(line):
+    # Stage Starts:
+    # Cloud-init v. 0.7.7 running 'init-local' at \
+    #               Fri, 02 Sep 2016 19:28:07 +0000. Up 1.0 seconds.
+    # Cloud-init v. 0.7.7 running 'init' at \
+    #               Fri, 02 Sep 2016 19:28:08 +0000. Up 2.0 seconds.
+    # Cloud-init v. 0.7.7 finished at
+    # Aug 29 22:55:26 test1 [CLOUDINIT] handlers.py[DEBUG]: \
+    #               finish: modules-final: SUCCESS: running modules for final
+    # 2016-08-30T21:53:25.972325+00:00 y1 [CLOUDINIT] handlers.py[DEBUG]: \
+    #               finish: modules-final: SUCCESS: running modules for final
+    #
+    # Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT] util.py[DEBUG]: \
+    #               Cloud-init v. 0.7.8 running 'init-local' at \
+    #               Thu, 03 Nov 2016 06:51:06 +0000. Up 1.0 seconds.
+    #
+    # 2017-05-22 18:02:01,088 - util.py[DEBUG]: Cloud-init v. 0.7.9 running \
+    #         'init-local' at Mon, 22 May 2017 18:02:01 +0000. Up 2.0 seconds.
+
+    separators = [' - ', ' [CLOUDINIT] ']
+    found = False
+    for sep in separators:
+        if sep in line:
+            found = True
+            break
+
+    if not found:
+        return None
+
+    (timehost, eventstr) = line.split(sep)
+
+    # journalctl -o short-precise
+    if timehost.endswith(":"):
+        timehost = " ".join(timehost.split()[0:-1])
+
+    if "," in timehost:
+        timestampstr, extra = timehost.split(",")
+        timestampstr += ",%s" % extra.split()[0]
+        if ' ' in extra:
+            hostname = extra.split()[-1]
+    else:
+        hostname = timehost.split()[-1]
+        timestampstr = timehost.split(hostname)[0].strip()
+    if 'Cloud-init v.' in eventstr:
+        event_type = 'start'
+        if 'running' in eventstr:
+            stage_and_timestamp = eventstr.split('running')[1].lstrip()
+            event_name, _ = stage_and_timestamp.split(' at ')
+            event_name = event_name.replace("'", "").replace(":", "-")
+            if event_name == "init":
+                event_name = "init-network"
+        else:
+            # don't generate a start for the 'finished at' banner
+            return None
+        event_description = stage_to_description[event_name]
+    else:
+        (pymodloglvl, event_type, event_name) = eventstr.split()[0:3]
+        event_description = eventstr.split(event_name)[1].strip()
+
+    event = {
+        'name': event_name.rstrip(":"),
+        'description': event_description,
+        'timestamp': parse_timestamp(timestampstr),
+        'origin': 'cloudinit',
+        'event_type': event_type.rstrip(":"),
+    }
+    if event['event_type'] == "finish":
+        result = event_description.split(":")[0]
+        desc = event_description.split(result)[1].lstrip(':').strip()
+        event['result'] = result
+        event['description'] = desc.strip()
+
+    return event
+
+
+def json_dumps(data):
+    return json.dumps(data, indent=1, sort_keys=True,
+                      separators=(',', ': '))
+
+
+def dump_events(cisource=None, rawdata=None):
+    events = []
+    event = None
+    CI_EVENT_MATCHES = ['start:', 'finish:', 'Cloud-init v.']
+
+    if not any([cisource, rawdata]):
+        raise ValueError('Either cisource or rawdata parameters are required')
+
+    if rawdata:
+        data = rawdata.splitlines()
+    else:
+        data = cisource.readlines()
+
+    for line in data:
+        for match in CI_EVENT_MATCHES:
+            if match in line:
+                try:
+                    event = parse_ci_logline(line)
+                except ValueError:
+                    sys.stderr.write('Skipping invalid entry\n')
+                if event:
+                    events.append(event)
+
+    return events, data
+
+
+def main():
+    if len(sys.argv) > 1:
+        cisource = open(sys.argv[1])
+    else:
+        cisource = sys.stdin
+
+    return json_dumps(dump_events(cisource))
+
+
+if __name__ == "__main__":
+    print(main())
diff --git a/cloudinit/analyze/show.py b/cloudinit/analyze/show.py
new file mode 100644
index 00000000..3b356bb8
--- /dev/null
+++ b/cloudinit/analyze/show.py
@@ -0,0 +1,207 @@
+#   Copyright (C) 2016 Canonical Ltd.
+#
+#   Author: Ryan Harper <ryan.harper@canonical.com>
+#
+# This file is part of cloud-init. See LICENSE file for license information.
+
+import base64
+import datetime
+import json
+import os
+
+from cloudinit import util
+
+#  An event:
+'''
+{
+        "description": "executing late commands",
+        "event_type": "start",
+        "level": "INFO",
+        "name": "cmd-install/stage-late"
+        "origin": "cloudinit",
+        "timestamp": 1461164249.1590767,
+},
+
+    {
+        "description": "executing late commands",
+        "event_type": "finish",
+        "level": "INFO",
+        "name": "cmd-install/stage-late",
+        "origin": "cloudinit",
+        "result": "SUCCESS",
+        "timestamp": 1461164249.1590767
+    }
+
+'''
+format_key = {
+    '%d': 'delta',
+    '%D': 'description',
+    '%E': 'elapsed',
+    '%e': 'event_type',
+    '%I': 'indent',
+    '%l': 'level',
+    '%n': 'name',
+    '%o': 'origin',
+    '%r': 'result',
+    '%t': 'timestamp',
+    '%T': 'total_time',
+}
+
+formatting_help = " ".join(["{0}: {1}".format(k.replace('%', '%%'), v)
+                           for k, v in format_key.items()])
+
+
+def format_record(msg, event):
+    for i, j in format_key.items():
+        if i in msg:
+            # ensure consistent formatting of time values
+            if j in ['delta', 'elapsed', 'timestamp']:
+                msg = msg.replace(i, "{%s:08.5f}" % j)
+            else:
+                msg = msg.replace(i, "{%s}" % j)
+    return msg.format(**event)
+
+
+def dump_event_files(event):
+    content = dict((k, v) for k, v in event.items() if k not in ['content'])
+    files = content['files']
+    saved = []
+    for f in files:
+        fname = f['path']
+        fn_local = os.path.basename(fname)
+        fcontent = base64.b64decode(f['content']).decode('ascii')
+        util.write_file(fn_local, fcontent)
+        saved.append(fn_local)
+
+    return saved
+
+
+def event_name(event):
+    if event:
+        return event.get('name')
+    return None
+
+
+def event_type(event):
+    if event:
+        return event.get('event_type')
+    return None
+
+
+def event_parent(event):
+    if event:
+        return event_name(event).split("/")[0]
+    return None
+
+
+def event_timestamp(event):
+    return float(event.get('timestamp'))
+
+
+def event_datetime(event):
+    return datetime.datetime.utcfromtimestamp(event_timestamp(event))
+
+
+def delta_seconds(t1, t2):
+    return (t2 - t1).total_seconds()
+
+
+def event_duration(start, finish):
+    return delta_seconds(event_datetime(start), event_datetime(finish))
+
+
+def event_record(start_time, start, finish):
+    record = finish.copy()
+    record.update({
+        'delta': event_duration(start, finish),
+        'elapsed': delta_seconds(start_time, event_datetime(start)),
+        'indent': '|' + ' ' * (event_name(start).count('/') - 1) + '`->',
+    })
+
+    return record
+
+
+def total_time_record(total_time):
+    return 'Total Time: %3.5f seconds\n' % total_time
+
+
+def generate_records(events, blame_sort=False,
+                     print_format="(%n) %d seconds in %I%D",
+                     dump_files=False, log_datafiles=False):
+
+    sorted_events = sorted(events, key=lambda x: x['timestamp'])
+    records = []
+    start_time = None
+    total_time = 0.0
+    stage_start_time = {}
+    stages_seen = []
+    boot_records = []
+
+    unprocessed = []
+    for e in range(0, len(sorted_events)):
+        event = events[e]
+        try:
+            next_evt = events[e + 1]
+        except IndexError:
+            next_evt = None
+
+        if event_type(event) == 'start':
+            if event.get('name') in stages_seen:
+                records.append(total_time_record(total_time))
+                boot_records.append(records)
+                records = []
+                start_time = None
+                total_time = 0.0
+
+            if start_time is None:
+                stages_seen = []
+                start_time = event_datetime(event)
+                stage_start_time[event_parent(event)] = start_time
+
+            # see if we have a pair
+            if event_name(event) == event_name(next_evt):
+                if event_type(next_evt) == 'finish':
+                    records.append(format_record(print_format,
+                                                 event_record(start_time,
+                                                              event,
+                                                              next_evt)))
+            else:
+                # This is a parent event
+                records.append("Starting stage: %s" % event.get('name'))
+                unprocessed.append(event)
+                stages_seen.append(event.get('name'))
+                continue
+        else:
+            prev_evt = unprocessed.pop()
+            if event_name(event) == event_name(prev_evt):
+                record = event_record(start_time, prev_evt, event)
+                records.append(format_record("Finished stage: "
+                                             "(%n) %d seconds ",
+                                             record) + "\n")
+                total_time += record.get('delta')
+            else:
+                # not a match, put it back
+                unprocessed.append(prev_evt)
+
+    records.append(total_time_record(total_time))
+    boot_records.append(records)
+    return boot_records
+
+
+def show_events(events, print_format):
+    return generate_records(events, print_format=print_format)
+
+
+def load_events(infile, rawdata=None):
+    if rawdata:
+        data = rawdata.read()
+    else:
+        data = infile.read()
+
+    j = None
+    try:
+        j = json.loads(data)
+    except json.JSONDecodeError:
+        pass
+
+    return j, data
diff --git a/cloudinit/analyze/tests/test_dump.py b/cloudinit/analyze/tests/test_dump.py
new file mode 100644
index 00000000..2c0885d0
--- /dev/null
+++ b/cloudinit/analyze/tests/test_dump.py
@@ -0,0 +1,210 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+from datetime import datetime
+from textwrap import dedent
+
+from cloudinit.analyze.dump import (
+    dump_events, parse_ci_logline, parse_timestamp)
+from cloudinit.util import subp, write_file
+from tests.unittests.helpers import CiTestCase
+
+
+class TestParseTimestamp(CiTestCase):
+
+    def test_parse_timestamp_handles_cloud_init_default_format(self):
+        """Logs with cloud-init detailed formats will be properly parsed."""
+        trusty_fmt = '%Y-%m-%d %H:%M:%S,%f'
+        trusty_stamp = '2016-09-12 14:39:20,839'
+
+        parsed = parse_timestamp(trusty_stamp)
+
+        # convert ourselves
+        dt = datetime.strptime(trusty_stamp, trusty_fmt)
+        expected = float(dt.strftime('%s.%f'))
+
+        # use date(1)
+        out, _err = subp(['date', '+%s.%3N', '-d', trusty_stamp])
+        timestamp = out.strip()
+        date_ts = float(timestamp)
+
+        self.assertEqual(expected, parsed)
+        self.assertEqual(expected, date_ts)
+        self.assertEqual(date_ts, parsed)
+
+    def test_parse_timestamp_handles_syslog_adding_year(self):
+        """Syslog timestamps lack a year. Add year and properly parse."""
+        syslog_fmt = '%b %d %H:%M:%S %Y'
+        syslog_stamp = 'Aug 08 15:12:51'
+
+        # convert stamp ourselves by adding the missing year value
+        year = datetime.now().year
+        dt = datetime.strptime(syslog_stamp + " " + str(year), syslog_fmt)
+        expected = float(dt.strftime('%s.%f'))
+        parsed = parse_timestamp(syslog_stamp)
+
+        # use date(1)
+        out, _ = subp(['date', '+%s.%3N', '-d', syslog_stamp])
+        timestamp = out.strip()
+        date_ts = float(timestamp)
+
+        self.assertEqual(expected, parsed)
+        self.assertEqual(expected, date_ts)
+        self.assertEqual(date_ts, parsed)
+
+    def test_parse_timestamp_handles_journalctl_format_adding_year(self):
+        """Journalctl precise timestamps lack a year. Add year and parse."""
+        journal_fmt = '%b %d %H:%M:%S.%f %Y'
+        journal_stamp = 'Aug 08 17:15:50.606811'
+
+        # convert stamp ourselves by adding the missing year value
+        year = datetime.now().year
+        dt = datetime.strptime(journal_stamp + " " + str(year), journal_fmt)
+        expected = float(dt.strftime('%s.%f'))
+        parsed = parse_timestamp(journal_stamp)
+
+        # use date(1)
+        out, _ = subp(['date', '+%s.%6N', '-d', journal_stamp])
+        timestamp = out.strip()
+        date_ts = float(timestamp)
+
+        self.assertEqual(expected, parsed)
+        self.assertEqual(expected, date_ts)
+        self.assertEqual(date_ts, parsed)
+
+    def test_parse_unexpected_timestamp_format_with_date_command(self):
+        """Dump sends unexpected timestamp formats to data for processing."""
+        new_fmt = '%H:%M %m/%d %Y'
+        new_stamp = '17:15 08/08'
+
+        # convert stamp ourselves by adding the missing year value
+        year = datetime.now().year
+        dt = datetime.strptime(new_stamp + " " + str(year), new_fmt)
+        expected = float(dt.strftime('%s.%f'))
+        parsed = parse_timestamp(new_stamp)
+
+        # use date(1)
+        out, _ = subp(['date', '+%s.%6N', '-d', new_stamp])
+        timestamp = out.strip()
+        date_ts = float(timestamp)
+
+        self.assertEqual(expected, parsed)
+        self.assertEqual(expected, date_ts)
+        self.assertEqual(date_ts, parsed)
+
+
+class TestParseCILogLine(CiTestCase):
+
+    def test_parse_logline_returns_none_without_separators(self):
+        """When no separators are found, parse_ci_logline returns None."""
+        expected_parse_ignores = [
+            '', '-', 'adsf-asdf', '2017-05-22 18:02:01,088', 'CLOUDINIT']
+        for parse_ignores in expected_parse_ignores:
+            self.assertIsNone(parse_ci_logline(parse_ignores))
+
+    def test_parse_logline_returns_event_for_cloud_init_logs(self):
+        """parse_ci_logline returns an event parse from cloud-init format."""
+        line = (
+            "2017-08-08 20:05:07,147 - util.py[DEBUG]: Cloud-init v. 0.7.9"
+            " running 'init-local' at Tue, 08 Aug 2017 20:05:07 +0000. Up"
+            " 6.26 seconds.")
+        dt = datetime.strptime(
+            '2017-08-08 20:05:07,147', '%Y-%m-%d %H:%M:%S,%f')
+        timestamp = float(dt.strftime('%s.%f'))
+        expected = {
+            'description': 'starting search for local datasources',
+            'event_type': 'start',
+            'name': 'init-local',
+            'origin': 'cloudinit',
+            'timestamp': timestamp}
+        self.assertEqual(expected, parse_ci_logline(line))
+
+    def test_parse_logline_returns_event_for_journalctl_logs(self):
+        """parse_ci_logline returns an event parse from journalctl format."""
+        line = ("Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT]"
+                " util.py[DEBUG]: Cloud-init v. 0.7.8 running 'init-local' at"
+                "  Thu, 03 Nov 2016 06:51:06 +0000. Up 1.0 seconds.")
+        year = datetime.now().year
+        dt = datetime.strptime(
+            'Nov 03 06:51:06.074410 %d' % year, '%b %d %H:%M:%S.%f %Y')
+        timestamp = float(dt.strftime('%s.%f'))
+        expected = {
+            'description': 'starting search for local datasources',
+            'event_type': 'start',
+            'name': 'init-local',
+            'origin': 'cloudinit',
+            'timestamp': timestamp}
+        self.assertEqual(expected, parse_ci_logline(line))
+
+    def test_parse_logline_returns_event_for_finish_events(self):
+        """parse_ci_logline returns a finish event for a parsed log line."""
+        line = ('2016-08-30 21:53:25.972325+00:00 y1 [CLOUDINIT]'
+                ' handlers.py[DEBUG]: finish: modules-final: SUCCESS: running'
+                ' modules for final')
+        expected = {
+            'description': 'running modules for final',
+            'event_type': 'finish',
+            'name': 'modules-final',
+            'origin': 'cloudinit',
+            'result': 'SUCCESS',
+            'timestamp': 1472594005.972}
+        self.assertEqual(expected, parse_ci_logline(line))
+
+
+SAMPLE_LOGS = dedent("""\
+Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT] util.py[DEBUG]:\
+ Cloud-init v. 0.7.8 running 'init-local' at Thu, 03 Nov 2016\
+ 06:51:06 +0000. Up 1.0 seconds.
+2016-08-30 21:53:25.972325+00:00 y1 [CLOUDINIT] handlers.py[DEBUG]: finish:\
+ modules-final: SUCCESS: running modules for final
+""")
+
+
+class TestDumpEvents(CiTestCase):
+    maxDiff = None
+
+    def test_dump_events_with_rawdata(self):
+        """Rawdata is split and parsed into a tuple of events and data"""
+        events, data = dump_events(rawdata=SAMPLE_LOGS)
+        expected_data = SAMPLE_LOGS.splitlines()
+        year = datetime.now().year
+        dt1 = datetime.strptime(
+            'Nov 03 06:51:06.074410 %d' % year, '%b %d %H:%M:%S.%f %Y')
+        timestamp1 = float(dt1.strftime('%s.%f'))
+        expected_events = [{
+            'description': 'starting search for local datasources',
+            'event_type': 'start',
+            'name': 'init-local',
+            'origin': 'cloudinit',
+            'timestamp': timestamp1}, {
+            'description': 'running modules for final',
+            'event_type': 'finish',
+            'name': 'modules-final',
+            'origin': 'cloudinit',
+            'result': 'SUCCESS',
+            'timestamp': 1472594005.972}]
+        self.assertEqual(expected_events, events)
+        self.assertEqual(expected_data, data)
+
+    def test_dump_events_with_cisource(self):
+        """Cisource file is read and parsed into a tuple of events and data."""
+        tmpfile = self.tmp_path('logfile')
+        write_file(tmpfile, SAMPLE_LOGS)
+        events, data = dump_events(cisource=open(tmpfile))
+        year = datetime.now().year
+        dt1 = datetime.strptime(
+            'Nov 03 06:51:06.074410 %d' % year, '%b %d %H:%M:%S.%f %Y')
+        timestamp1 = float(dt1.strftime('%s.%f'))
+        expected_events = [{
+            'description': 'starting search for local datasources',
+            'event_type': 'start',
+            'name': 'init-local',
+            'origin': 'cloudinit',
+            'timestamp': timestamp1}, {
+            'description': 'running modules for final',
+            'event_type': 'finish',
+            'name': 'modules-final',
+            'origin': 'cloudinit',
+            'result': 'SUCCESS',
+            'timestamp': 1472594005.972}]
+        self.assertEqual(expected_events, events)
+        self.assertEqual(SAMPLE_LOGS.splitlines(), [d.strip() for d in data])
diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py
index 139e03b3..9c0ac864 100644
--- a/cloudinit/cmd/main.py
+++ b/cloudinit/cmd/main.py
@@ -50,13 +50,6 @@ WELCOME_MSG_TPL = ("Cloud-init v. {version} running '{action}' at "
 # Module section template
 MOD_SECTION_TPL = "cloud_%s_modules"
 
-# Things u can query on
-QUERY_DATA_TYPES = [
-    'data',
-    'data_raw',
-    'instance_id',
-]
-
 # Frequency shortname to full name
 # (so users don't have to remember the full name...)
 FREQ_SHORT_NAMES = {
@@ -510,11 +503,6 @@ def main_modules(action_name, args):
     return run_module_section(mods, name, name)
 
 
-def main_query(name, _args):
-    raise NotImplementedError(("Action '%s' is not"
-                               " currently implemented") % (name))
-
-
 def main_single(name, args):
     # Cloud-init single stage is broken up into the following sub-stages
     # 1. Ensure that the init object fetches its config without errors
@@ -713,9 +701,11 @@ def main(sysv_args=None):
                         default=False)
 
     parser.set_defaults(reporter=None)
-    subparsers = parser.add_subparsers()
+    subparsers = parser.add_subparsers(title='Subcommands', dest='subcommand')
+    subparsers.required = True
 
     # Each action and its sub-options (if any)
+
     parser_init = subparsers.add_parser('init',
                                         help=('initializes cloud-init and'
                                               ' performs initial modules'))
@@ -737,17 +727,6 @@ def main(sysv_args=None):
                             choices=('init', 'config', 'final'))
     parser_mod.set_defaults(action=('modules', main_modules))
 
-    # These settings are used when you want to query information
-    # stored in the cloud-init data objects/directories/files
-    parser_query = subparsers.add_parser('query',
-                                         help=('query information stored '
-                                               'in cloud-init'))
-    parser_query.add_argument("--name", '-n', action="store",
-                              help="item name to query on",
-                              required=True,
-                              choices=QUERY_DATA_TYPES)
-    parser_query.set_defaults(action=('query', main_query))
-
     # This subcommand allows you to run a single module
     parser_single = subparsers.add_parser('single',
                                           help=('run a single module '))
@@ -781,15 +760,22 @@ def main(sysv_args=None):
                                             help=('list defined features'))
     parser_features.set_defaults(action=('features', main_features))
 
+    parser_analyze = subparsers.add_parser(
+        'analyze', help='Devel tool: Analyze cloud-init logs and data')
+    if sysv_args and sysv_args[0] == 'analyze':
+        # Only load this parser if analyze is specified to avoid file load cost
+        # FIXME put this under 'devel' subcommand (coming in next branch)
+        from cloudinit.analyze.__main__ import get_parser as analyze_parser
+        # Construct analyze subcommand parser
+        analyze_parser(parser_analyze)
+
     args = parser.parse_args(args=sysv_args)
 
-    try:
-        (name, functor) = args.action
-    except AttributeError:
-        parser.error('too few arguments')
+    # Subparsers.required = True and each subparser sets action=(name, functor)
+    (name, functor) = args.action
 
     # Setup basic logging to start (until reinitialized)
-    # iff in debug mode...
+    # iff in debug mode.
     if args.debug:
         logging.setupBasicLogging()
 
diff --git a/doc/rtd/index.rst b/doc/rtd/index.rst
index a691103e..de67f361 100644
--- a/doc/rtd/index.rst
+++ b/doc/rtd/index.rst
@@ -40,6 +40,7 @@ initialization of a cloud instance.
    topics/merging.rst
    topics/network-config.rst
    topics/vendordata.rst
+   topics/debugging.rst
    topics/moreinfo.rst
    topics/hacking.rst
    topics/tests.rst
diff --git a/doc/rtd/topics/debugging.rst b/doc/rtd/topics/debugging.rst
new file mode 100644
index 00000000..4e43dd57
--- /dev/null
+++ b/doc/rtd/topics/debugging.rst
@@ -0,0 +1,146 @@
+**********************
+Testing and debugging cloud-init
+**********************
+
+Overview
+========
+This topic will discuss general approaches for test and debug of cloud-init on
+deployed instances.
+
+
+Boot Time Analysis - cloud-init analyze
+======================================
+Occasionally instances don't appear as performant as we would like and
+cloud-init packages a simple facility to inspect what operations took
+cloud-init the longest during boot and setup.
+
+The script **/usr/bin/cloud-init** has an analyze sub-command **analyze**
+which parses any cloud-init.log file into formatted and sorted events. It
+allows for detailed analysis of the most costly cloud-init operations are to
+determine the long-pole in cloud-init configuration and setup. These
+subcommands default to reading /var/log/cloud-init.log.
+
+* ``analyze show`` Parse and organize cloud-init.log events by stage and
+include each sub-stage granularity with time delta reports.
+
+.. code-block:: bash
+
+    $ cloud-init analyze show -i my-cloud-init.log
+    -- Boot Record 01 --
+    The total time elapsed since completing an event is printed after the "@"
+    character.
+    The time the event takes is printed after the "+" character.
+
+    Starting stage: modules-config
+    |`->config-emit_upstart ran successfully @05.47600s +00.00100s
+    |`->config-snap_config ran successfully @05.47700s +00.00100s
+    |`->config-ssh-import-id ran successfully @05.47800s +00.00200s
+    |`->config-locale ran successfully @05.48000s +00.00100s
+    ...
+
+
+* ``analyze dump`` Parse cloud-init.log into event records and return a list of
+dictionaries that can be consumed for other reporting needs.
+
+.. code-block:: bash
+
+    $ cloud-init analyze blame -i my-cloud-init.log
+    [
+     {
+      "description": "running config modules",
+      "event_type": "start",
+      "name": "modules-config",
+      "origin": "cloudinit",
+      "timestamp": 1510807493.0
+     },...
+
+* ``analyze blame`` Parse cloud-init.log into event records and sort them based
+on highest time cost for quick assessment of areas of cloud-init that may need
+improvement.
+
+.. code-block:: bash
+
+    $ cloud-init analyze blame -i my-cloud-init.log
+    -- Boot Record 11 --
+         00.01300s (modules-final/config-scripts-per-boot)
+         00.00400s (modules-final/config-final-message)
+         00.00100s (modules-final/config-rightscale_userdata)
+         ...
+
+
+Analyze quickstart - LXC
+---------------------------
+To quickly obtain a cloud-init log try using lxc on any ubuntu system:
+
+.. code-block:: bash
+
+  $ lxc init ubuntu-daily:xenial x1
+  $ lxc start x1
+  # Take lxc's cloud-init.log and pipe it to the analyzer
+  $ lxc file pull x1/var/log/cloud-init.log - | cloud-init analyze dump -i -
+  $ lxc file pull x1/var/log/cloud-init.log - | \
+  python3 -m cloudinit.analyze dump -i -
+
+Analyze quickstart - KVM
+---------------------------
+To quickly analyze a KVM a cloud-init log:
+
+1. Download the current cloud image
+  wget https://cloud-images.ubuntu.com/daily/server/xenial/current/xenial-server-cloudimg-amd64.img
+2. Create a snapshot image to preserve the original cloud-image
+
+.. code-block:: bash
+
+    $ qemu-img create -b xenial-server-cloudimg-amd64.img -f qcow2 \
+    test-cloudinit.qcow2
+
+3. Create a seed image with metadata using `cloud-localds`
+
+.. code-block:: bash
+
+    $ cat > user-data <<EOF
+      #cloud-config
+      password: passw0rd
+      chpasswd: { expire: False }
+      EOF
+    $  cloud-localds my-seed.img user-data
+
+4. Launch your modified VM
+
+.. code-block:: bash
+
+    $  kvm -m 512 -net nic -net user -redir tcp:2222::22 \
+   -drive file=test-cloudinit.qcow2,if=virtio,format=qcow2 \
+   -drive file=my-seed.img,if=virtio,format=raw
+
+5. Analyze the boot (blame, dump, show)
+
+.. code-block:: bash
+
+    $ ssh -p 2222 ubuntu@localhost 'cat /var/log/cloud-init.log' | \
+   cloud-init analyze blame -i -
+
+
+Running single cloud config modules
+===================================
+This subcommand is not called by the init system. It can be called manually to
+load the configured datasource and run a single cloud-config module once using
+the cached userdata and metadata after the instance has booted. Each
+cloud-config module has a module FREQUENCY configured: PER_INSTANCE, PER_BOOT,
+PER_ONCE or PER_ALWAYS. When a module is run by cloud-init, it stores a
+semaphore file in
+``/var/lib/cloud/instance/sem/config_<module_name>.<frequency>`` which marks
+when the module last successfully ran. Presence of this semaphore file
+prevents a module from running again if it has already been run. To ensure that
+a module is run again, the desired frequency can be overridden on the
+commandline:
+
+.. code-block:: bash
+
+  $ sudo cloud-init single --name cc_ssh --frequency always
+  ...
+  Generating public/private ed25519 key pair
+  ...
+
+Inspect cloud-init.log for output of what operations were performed as a
+result.
diff --git a/tests/unittests/test_cli.py b/tests/unittests/test_cli.py
index 06f366b2..7780f164 100644
--- a/tests/unittests/test_cli.py
+++ b/tests/unittests/test_cli.py
@@ -31,9 +31,90 @@ class TestCLI(test_helpers.FilesystemMockingTestCase):
 
     def test_no_arguments_shows_error_message(self):
         exit_code = self._call_main()
-        self.assertIn('cloud-init: error: too few arguments',
-                      self.stderr.getvalue())
+        missing_subcommand_message = [
+            'too few arguments',  # python2.7 msg
+            'the following arguments are required: subcommand'  # python3 msg
+        ]
+        error = self.stderr.getvalue()
+        matches = ([msg in error for msg in missing_subcommand_message])
+        self.assertTrue(
+            any(matches), 'Did not find error message for missing subcommand')
         self.assertEqual(2, exit_code)
 
+    def test_all_subcommands_represented_in_help(self):
+        """All known subparsers are represented in the cloud-int help doc."""
+        self._call_main()
+        error = self.stderr.getvalue()
+        expected_subcommands = ['analyze', 'init', 'modules', 'single',
+                                'dhclient-hook', 'features']
+        for subcommand in expected_subcommands:
+            self.assertIn(subcommand, error)
 
-# vi: ts=4 expandtab
+    @mock.patch('cloudinit.cmd.main.status_wrapper')
+    def test_init_subcommand_parser(self, m_status_wrapper):
+        """The subcommand 'init' calls status_wrapper passing init."""
+        self._call_main(['cloud-init', 'init'])
+        (name, parseargs) = m_status_wrapper.call_args_list[0][0]
+        self.assertEqual('init', name)
+        self.assertEqual('init', parseargs.subcommand)
+        self.assertEqual('init', parseargs.action[0])
+        self.assertEqual('main_init', parseargs.action[1].__name__)
+
+    @mock.patch('cloudinit.cmd.main.status_wrapper')
+    def test_modules_subcommand_parser(self, m_status_wrapper):
+        """The subcommand 'modules' calls status_wrapper passing modules."""
+        self._call_main(['cloud-init', 'modules'])
+        (name, parseargs) = m_status_wrapper.call_args_list[0][0]
+        self.assertEqual('modules', name)
+        self.assertEqual('modules', parseargs.subcommand)
+        self.assertEqual('modules', parseargs.action[0])
+        self.assertEqual('main_modules', parseargs.action[1].__name__)
+
+    def test_analyze_subcommand_parser(self):
+        """The subcommand cloud-init analyze calls the correct subparser."""
+        self._call_main(['cloud-init', 'analyze'])
+        # These subcommands only valid for cloud-init analyze script
+        expected_subcommands = ['blame', 'show', 'dump']
+        error = self.stderr.getvalue()
+        for subcommand in expected_subcommands:
+            self.assertIn(subcommand, error)
+
+    @mock.patch('cloudinit.cmd.main.main_single')
+    def test_single_subcommand(self, m_main_single):
+        """The subcommand 'single' calls main_single with valid args."""
+        self._call_main(['cloud-init', 'single', '--name', 'cc_ntp'])
+        (name, parseargs) = m_main_single.call_args_list[0][0]
+        self.assertEqual('single', name)
+        self.assertEqual('single', parseargs.subcommand)
+        self.assertEqual('single', parseargs.action[0])
+        self.assertFalse(parseargs.debug)
+        self.assertFalse(parseargs.force)
+        self.assertIsNone(parseargs.frequency)
+        self.assertEqual('cc_ntp', parseargs.name)
+        self.assertFalse(parseargs.report)
+
+    @mock.patch('cloudinit.cmd.main.dhclient_hook')
+    def test_dhclient_hook_subcommand(self, m_dhclient_hook):
+        """The subcommand 'dhclient-hook' calls dhclient_hook with args."""
+        self._call_main(['cloud-init', 'dhclient-hook', 'net_action', 'eth0'])
+        (name, parseargs) = m_dhclient_hook.call_args_list[0][0]
+        self.assertEqual('dhclient_hook', name)
+        self.assertEqual('dhclient-hook', parseargs.subcommand)
+        self.assertEqual('dhclient_hook', parseargs.action[0])
+        self.assertFalse(parseargs.debug)
+        self.assertFalse(parseargs.force)
+        self.assertEqual('net_action', parseargs.net_action)
+        self.assertEqual('eth0', parseargs.net_interface)
+
+    @mock.patch('cloudinit.cmd.main.main_features')
+    def test_features_hook_subcommand(self, m_features):
+        """The subcommand 'features' calls main_features with args."""
+        self._call_main(['cloud-init', 'features'])
+        (name, parseargs) = m_features.call_args_list[0][0]
+        self.assertEqual('features', name)
+        self.assertEqual('features', parseargs.subcommand)
+        self.assertEqual('features', parseargs.action[0])
+        self.assertFalse(parseargs.debug)
+        self.assertFalse(parseargs.force)
+
+# : ts=4 expandtab
-- 
cgit v1.2.3