diff options
author | Chad Smith <chad.smith@canonical.com> | 2017-08-21 13:46:23 -0600 |
---|---|---|
committer | Chad Smith <chad.smith@canonical.com> | 2017-08-21 13:46:23 -0600 |
commit | e74d7752f1761c3a8d3c19877de4707d00c49d08 (patch) | |
tree | 0627dd5d06d83b770b38196bceabddd06c0b1b77 /cloudinit/analyze/dump.py | |
parent | dc2bd79949492bccdc1d7df0132f98c354d51943 (diff) | |
download | vyos-cloud-init-e74d7752f1761c3a8d3c19877de4707d00c49d08.tar.gz vyos-cloud-init-e74d7752f1761c3a8d3c19877de4707d00c49d08.zip |
tools: Add tooling for basic cloud-init performance analysis.
This branch adds cloudinit-analyze into cloud-init proper. It adds an
"analyze" subcommand to the cloud-init command line utility for quick
performance assessment of cloud-init stages and events.
On a cloud-init configured instance, running "cloud-init analyze blame"
will now report which cloud-init events cost the most wall time. This
allows for quick assessment of the most costly stages of cloud-init.
This functionality is pulled from Ryan Harper's analyze work.
The cloudinit-analyze main script itself has been refactored a bit for
inclusion as a subcommand of cloud-init CLI. There will be a followup
branch at some point which will optionally instrument detailed strace
profiling, but that approach needs a bit more discussion first.
This branch also adds:
* additional debugging topic to the sphinx-generated docs describing
cloud-init analyze, dump and show as well as cloud-init single usage.
* Updates the Makefile unittests target to include cloudinit directory
because we now have unittests within that package.
LP: #1709761
Diffstat (limited to 'cloudinit/analyze/dump.py')
-rw-r--r-- | cloudinit/analyze/dump.py | 176 |
1 files changed, 176 insertions, 0 deletions
diff --git a/cloudinit/analyze/dump.py b/cloudinit/analyze/dump.py new file mode 100644 index 00000000..ca4da496 --- /dev/null +++ b/cloudinit/analyze/dump.py @@ -0,0 +1,176 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +import calendar +from datetime import datetime +import json +import sys + +from cloudinit import util + +stage_to_description = { + 'finished': 'finished running cloud-init', + 'init-local': 'starting search for local datasources', + 'init-network': 'searching for network datasources', + 'init': 'searching for network datasources', + 'modules-config': 'running config modules', + 'modules-final': 'finalizing modules', + 'modules': 'running modules for', + 'single': 'running single module ', +} + +# logger's asctime format +CLOUD_INIT_ASCTIME_FMT = "%Y-%m-%d %H:%M:%S,%f" + +# journctl -o short-precise +CLOUD_INIT_JOURNALCTL_FMT = "%b %d %H:%M:%S.%f %Y" + +# other +DEFAULT_FMT = "%b %d %H:%M:%S %Y" + + +def parse_timestamp(timestampstr): + # default syslog time does not include the current year + months = [calendar.month_abbr[m] for m in range(1, 13)] + if timestampstr.split()[0] in months: + # Aug 29 22:55:26 + FMT = DEFAULT_FMT + if '.' in timestampstr: + FMT = CLOUD_INIT_JOURNALCTL_FMT + dt = datetime.strptime(timestampstr + " " + + str(datetime.now().year), + FMT) + timestamp = dt.strftime("%s.%f") + elif "," in timestampstr: + # 2016-09-12 14:39:20,839 + dt = datetime.strptime(timestampstr, CLOUD_INIT_ASCTIME_FMT) + timestamp = dt.strftime("%s.%f") + else: + # allow date(1) to handle other formats we don't expect + timestamp = parse_timestamp_from_date(timestampstr) + + return float(timestamp) + + +def parse_timestamp_from_date(timestampstr): + out, _ = util.subp(['date', '+%s.%3N', '-d', timestampstr]) + timestamp = out.strip() + return float(timestamp) + + +def parse_ci_logline(line): + # Stage Starts: + # Cloud-init v. 0.7.7 running 'init-local' at \ + # Fri, 02 Sep 2016 19:28:07 +0000. Up 1.0 seconds. + # Cloud-init v. 0.7.7 running 'init' at \ + # Fri, 02 Sep 2016 19:28:08 +0000. Up 2.0 seconds. + # Cloud-init v. 0.7.7 finished at + # Aug 29 22:55:26 test1 [CLOUDINIT] handlers.py[DEBUG]: \ + # finish: modules-final: SUCCESS: running modules for final + # 2016-08-30T21:53:25.972325+00:00 y1 [CLOUDINIT] handlers.py[DEBUG]: \ + # finish: modules-final: SUCCESS: running modules for final + # + # Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT] util.py[DEBUG]: \ + # Cloud-init v. 0.7.8 running 'init-local' at \ + # Thu, 03 Nov 2016 06:51:06 +0000. Up 1.0 seconds. + # + # 2017-05-22 18:02:01,088 - util.py[DEBUG]: Cloud-init v. 0.7.9 running \ + # 'init-local' at Mon, 22 May 2017 18:02:01 +0000. Up 2.0 seconds. + + separators = [' - ', ' [CLOUDINIT] '] + found = False + for sep in separators: + if sep in line: + found = True + break + + if not found: + return None + + (timehost, eventstr) = line.split(sep) + + # journalctl -o short-precise + if timehost.endswith(":"): + timehost = " ".join(timehost.split()[0:-1]) + + if "," in timehost: + timestampstr, extra = timehost.split(",") + timestampstr += ",%s" % extra.split()[0] + if ' ' in extra: + hostname = extra.split()[-1] + else: + hostname = timehost.split()[-1] + timestampstr = timehost.split(hostname)[0].strip() + if 'Cloud-init v.' in eventstr: + event_type = 'start' + if 'running' in eventstr: + stage_and_timestamp = eventstr.split('running')[1].lstrip() + event_name, _ = stage_and_timestamp.split(' at ') + event_name = event_name.replace("'", "").replace(":", "-") + if event_name == "init": + event_name = "init-network" + else: + # don't generate a start for the 'finished at' banner + return None + event_description = stage_to_description[event_name] + else: + (pymodloglvl, event_type, event_name) = eventstr.split()[0:3] + event_description = eventstr.split(event_name)[1].strip() + + event = { + 'name': event_name.rstrip(":"), + 'description': event_description, + 'timestamp': parse_timestamp(timestampstr), + 'origin': 'cloudinit', + 'event_type': event_type.rstrip(":"), + } + if event['event_type'] == "finish": + result = event_description.split(":")[0] + desc = event_description.split(result)[1].lstrip(':').strip() + event['result'] = result + event['description'] = desc.strip() + + return event + + +def json_dumps(data): + return json.dumps(data, indent=1, sort_keys=True, + separators=(',', ': ')) + + +def dump_events(cisource=None, rawdata=None): + events = [] + event = None + CI_EVENT_MATCHES = ['start:', 'finish:', 'Cloud-init v.'] + + if not any([cisource, rawdata]): + raise ValueError('Either cisource or rawdata parameters are required') + + if rawdata: + data = rawdata.splitlines() + else: + data = cisource.readlines() + + for line in data: + for match in CI_EVENT_MATCHES: + if match in line: + try: + event = parse_ci_logline(line) + except ValueError: + sys.stderr.write('Skipping invalid entry\n') + if event: + events.append(event) + + return events, data + + +def main(): + if len(sys.argv) > 1: + cisource = open(sys.argv[1]) + else: + cisource = sys.stdin + + return json_dumps(dump_events(cisource)) + + +if __name__ == "__main__": + print(main()) |