From 6a979bb2fabd187efc392de7b0852cd0361bc9b8 Mon Sep 17 00:00:00 2001 From: Ryan Harper Date: Tue, 17 Apr 2018 20:07:59 -0500 Subject: Implement bash completion script for cloud-init command line In bash shells with bash_completion enabled, now the cloud-init sub commands and parameters/flags will be shown. --- bash_completion/cloud-init | 77 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 bash_completion/cloud-init (limited to 'bash_completion') diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init new file mode 100644 index 00000000..581432c8 --- /dev/null +++ b/bash_completion/cloud-init @@ -0,0 +1,77 @@ +# Copyright (C) 2018 Canonical Ltd. +# +# This file is part of cloud-init. See LICENSE file for license information. + +# bash completion for cloud-init cli +_cloudinit_complete() +{ + + local cur_word prev_word + cur_word="${COMP_WORDS[COMP_CWORD]}" + prev_word="${COMP_WORDS[COMP_CWORD-1]}" + + subcmds="analyze clean collect-logs devel dhclient-hook features init modules single status" + base_params="--help --file --version --debug --force" + case ${COMP_CWORD} in + 1) + COMPREPLY=($(compgen -W "$base_params $subcmds" -- $cur_word)) + ;; + 2) + case ${prev_word} in + analyze) + COMPREPLY=($(compgen -W "--help blame dump show" -- $cur_word)) + ;; + clean) + COMPREPLY=($(compgen -W "--help --logs --reboot --seed" -- $cur_word)) + ;; + collect-logs) + COMPREPLY=($(compgen -W "--help --tarfile --include-userdata" -- $cur_word)) + ;; + devel) + COMPREPLY=($(compgen -W "--help schema" -- $cur_word)) + ;; + dhclient-hook|features) + COMPREPLY=($(compgen -W "--help" -- $cur_word)) + ;; + init) + COMPREPLY=($(compgen -W "--help --local" -- $cur_word)) + ;; + modules) + COMPREPLY=($(compgen -W "--help --mode" -- $cur_word)) + ;; + + single) + COMPREPLY=($(compgen -W "--help --name --frequency --report" -- $cur_word)) + ;; + status) + COMPREPLY=($(compgen -W "--help --long --wait" -- $cur_word)) + ;; + esac + ;; + 3) + case ${prev_word} in + blame|dump) + COMPREPLY=($(compgen -W "--help --infile --outfile" -- $cur_word)) + ;; + --mode) + COMPREPLY=($(compgen -W "--help init config final" -- $cur_word)) + ;; + --frequency) + COMPREPLY=($(compgen -W "--help instance always once" -- $cur_word)) + ;; + schema) + COMPREPLY=($(compgen -W "--help --config-file --doc --annotate" -- $cur_word)) + ;; + show) + COMPREPLY=($(compgen -W "--help --format --infile --outfile" -- $cur_word)) + ;; + esac + ;; + *) + COMPREPLY=() + ;; + esac +} +complete -F _cloudinit_complete cloud-init + +# vi: syntax=bash expandtab -- cgit v1.2.3 From a6f95c72259f2890e4a9f9f11166310812173c68 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Mon, 6 Aug 2018 16:50:51 +0000 Subject: tools: Add 'net-convert' subcommand command to 'cloud-init devel'. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the tools/net-convert.py to be exposed as part of 'cloud-init devel' subcommands. It can now be called like: $ cloud-init devel net-convert Or, if you just have checked out source (and no cli executable):   $ python3 -m cloudinit.cmd.devel.net_convert or   $ python3 -m cloudinit.cmd.main devel net-convert --- bash_completion/cloud-init | 7 ++- cloudinit/cmd/devel/net_convert.py | 115 +++++++++++++++++++++++++++++++++++++ cloudinit/cmd/devel/parser.py | 20 ++++--- tests/unittests/test_cli.py | 3 +- tools/net-convert.py | 104 --------------------------------- 5 files changed, 134 insertions(+), 115 deletions(-) create mode 100755 cloudinit/cmd/devel/net_convert.py delete mode 100755 tools/net-convert.py (limited to 'bash_completion') diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init index 581432c8..f38164b0 100644 --- a/bash_completion/cloud-init +++ b/bash_completion/cloud-init @@ -28,7 +28,7 @@ _cloudinit_complete() COMPREPLY=($(compgen -W "--help --tarfile --include-userdata" -- $cur_word)) ;; devel) - COMPREPLY=($(compgen -W "--help schema" -- $cur_word)) + COMPREPLY=($(compgen -W "--help schema net-convert" -- $cur_word)) ;; dhclient-hook|features) COMPREPLY=($(compgen -W "--help" -- $cur_word)) @@ -59,6 +59,9 @@ _cloudinit_complete() --frequency) COMPREPLY=($(compgen -W "--help instance always once" -- $cur_word)) ;; + net-convert) + COMPREPLY=($(compgen -W "--help --network-data --kind --directory --output-kind" -- $cur_word)) + ;; schema) COMPREPLY=($(compgen -W "--help --config-file --doc --annotate" -- $cur_word)) ;; @@ -74,4 +77,4 @@ _cloudinit_complete() } complete -F _cloudinit_complete cloud-init -# vi: syntax=bash expandtab +# vi: syntax=sh expandtab diff --git a/cloudinit/cmd/devel/net_convert.py b/cloudinit/cmd/devel/net_convert.py new file mode 100755 index 00000000..1ec08a3c --- /dev/null +++ b/cloudinit/cmd/devel/net_convert.py @@ -0,0 +1,115 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Debug network config format conversions.""" +import argparse +import json +import os +import sys +import yaml + +from cloudinit.sources.helpers import openstack + +from cloudinit.net import eni, netplan, network_state, sysconfig +from cloudinit import log + +NAME = 'net-convert' + + +def get_parser(parser=None): + """Build or extend and arg parser for net-convert utility. + + @param parser: Optional existing ArgumentParser instance representing the + subcommand which will be extended to support the args of this utility. + + @returns: ArgumentParser with proper argument configuration. + """ + if not parser: + parser = argparse.ArgumentParser(prog=NAME, description=__doc__) + parser.add_argument("-p", "--network-data", type=open, + metavar="PATH", required=True) + parser.add_argument("-k", "--kind", + choices=['eni', 'network_data.json', 'yaml'], + required=True) + parser.add_argument("-d", "--directory", + metavar="PATH", + help="directory to place output in", + required=True) + parser.add_argument("-m", "--mac", + metavar="name,mac", + action='append', + help="interface name to mac mapping") + parser.add_argument("--debug", action='store_true', + help='enable debug logging to stderr.') + parser.add_argument("-O", "--output-kind", + choices=['eni', 'netplan', 'sysconfig'], + required=True) + return parser + + +def handle_args(name, args): + if not args.directory.endswith("/"): + args.directory += "/" + + if not os.path.isdir(args.directory): + os.makedirs(args.directory) + + if args.debug: + log.setupBasicLogging(level=log.DEBUG) + else: + log.setupBasicLogging(level=log.WARN) + if args.mac: + known_macs = {} + for item in args.mac: + iface_name, iface_mac = item.split(",", 1) + known_macs[iface_mac] = iface_name + else: + known_macs = None + + net_data = args.network_data.read() + if args.kind == "eni": + pre_ns = eni.convert_eni_data(net_data) + ns = network_state.parse_net_config_data(pre_ns) + elif args.kind == "yaml": + pre_ns = yaml.load(net_data) + if 'network' in pre_ns: + pre_ns = pre_ns.get('network') + if args.debug: + sys.stderr.write('\n'.join( + ["Input YAML", + yaml.dump(pre_ns, default_flow_style=False, indent=4), ""])) + ns = network_state.parse_net_config_data(pre_ns) + else: + pre_ns = openstack.convert_net_json( + json.loads(net_data), known_macs=known_macs) + ns = network_state.parse_net_config_data(pre_ns) + + if not ns: + raise RuntimeError("No valid network_state object created from" + "input data") + + if args.debug: + sys.stderr.write('\n'.join([ + "", "Internal State", + yaml.dump(ns, default_flow_style=False, indent=4), ""])) + if args.output_kind == "eni": + r_cls = eni.Renderer + elif args.output_kind == "netplan": + r_cls = netplan.Renderer + else: + r_cls = sysconfig.Renderer + + r = r_cls() + sys.stderr.write(''.join([ + "Read input format '%s' from '%s'.\n" % ( + args.kind, args.network_data.name), + "Wrote output format '%s' to '%s'\n" % ( + args.output_kind, args.directory)]) + "\n") + r.render_network_state(network_state=ns, target=args.directory) + + +if __name__ == '__main__': + args = get_parser().parse_args() + handle_args(NAME, args) + + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/devel/parser.py b/cloudinit/cmd/devel/parser.py index acacc4ed..40a4b019 100644 --- a/cloudinit/cmd/devel/parser.py +++ b/cloudinit/cmd/devel/parser.py @@ -5,8 +5,9 @@ """Define 'devel' subcommand argument parsers to include in cloud-init cmd.""" import argparse -from cloudinit.config.schema import ( - get_parser as schema_parser, handle_schema_args) +from cloudinit.config import schema + +from . import net_convert def get_parser(parser=None): @@ -17,10 +18,15 @@ def get_parser(parser=None): subparsers = parser.add_subparsers(title='Subcommands', dest='subcommand') subparsers.required = True - parser_schema = subparsers.add_parser( - 'schema', help='Validate cloud-config files or document schema') - # Construct schema subcommand parser - schema_parser(parser_schema) - parser_schema.set_defaults(action=('schema', handle_schema_args)) + subcmds = [ + ('schema', 'Validate cloud-config files for document schema', + schema.get_parser, schema.handle_schema_args), + (net_convert.NAME, net_convert.__doc__, + net_convert.get_parser, net_convert.handle_args) + ] + for (subcmd, helpmsg, get_parser, handler) in subcmds: + parser = subparsers.add_parser(subcmd, help=helpmsg) + get_parser(parser) + parser.set_defaults(action=(subcmd, handler)) return parser diff --git a/tests/unittests/test_cli.py b/tests/unittests/test_cli.py index 0c0f427a..199d69b0 100644 --- a/tests/unittests/test_cli.py +++ b/tests/unittests/test_cli.py @@ -208,8 +208,7 @@ class TestCLI(test_helpers.FilesystemMockingTestCase): for subcommand in expected_subcommands: self.assertIn(subcommand, error) - @mock.patch('cloudinit.config.schema.handle_schema_args') - def test_wb_devel_schema_subcommand_parser(self, m_schema): + def test_wb_devel_schema_subcommand_parser(self): """The subcommand cloud-init schema calls the correct subparser.""" exit_code = self._call_main(['cloud-init', 'devel', 'schema']) self.assertEqual(1, exit_code) diff --git a/tools/net-convert.py b/tools/net-convert.py deleted file mode 100755 index d1a4a646..00000000 --- a/tools/net-convert.py +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/python3 -# This file is part of cloud-init. See LICENSE file for license information. - -import argparse -import json -import os -import sys -import yaml - -from cloudinit.sources.helpers import openstack - -from cloudinit.net import eni -from cloudinit import log -from cloudinit.net import netplan -from cloudinit.net import network_state -from cloudinit.net import sysconfig - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--network-data", "-p", type=open, - metavar="PATH", required=True) - parser.add_argument("--kind", "-k", - choices=['eni', 'network_data.json', 'yaml'], - required=True) - parser.add_argument("-d", "--directory", - metavar="PATH", - help="directory to place output in", - required=True) - parser.add_argument("-m", "--mac", - metavar="name,mac", - action='append', - help="interface name to mac mapping") - parser.add_argument("--debug", action='store_true', - help='enable debug logging to stderr.') - parser.add_argument("--output-kind", "-ok", - choices=['eni', 'netplan', 'sysconfig'], - required=True) - args = parser.parse_args() - - if not args.directory.endswith("/"): - args.directory += "/" - - if not os.path.isdir(args.directory): - os.makedirs(args.directory) - - if args.debug: - log.setupBasicLogging(level=log.DEBUG) - else: - log.setupBasicLogging(level=log.WARN) - if args.mac: - known_macs = {} - for item in args.mac: - iface_name, iface_mac = item.split(",", 1) - known_macs[iface_mac] = iface_name - else: - known_macs = None - - net_data = args.network_data.read() - if args.kind == "eni": - pre_ns = eni.convert_eni_data(net_data) - ns = network_state.parse_net_config_data(pre_ns) - elif args.kind == "yaml": - pre_ns = yaml.load(net_data) - if 'network' in pre_ns: - pre_ns = pre_ns.get('network') - if args.debug: - sys.stderr.write('\n'.join( - ["Input YAML", - yaml.dump(pre_ns, default_flow_style=False, indent=4), ""])) - ns = network_state.parse_net_config_data(pre_ns) - else: - pre_ns = openstack.convert_net_json( - json.loads(net_data), known_macs=known_macs) - ns = network_state.parse_net_config_data(pre_ns) - - if not ns: - raise RuntimeError("No valid network_state object created from" - "input data") - - if args.debug: - sys.stderr.write('\n'.join([ - "", "Internal State", - yaml.dump(ns, default_flow_style=False, indent=4), ""])) - if args.output_kind == "eni": - r_cls = eni.Renderer - elif args.output_kind == "netplan": - r_cls = netplan.Renderer - else: - r_cls = sysconfig.Renderer - - r = r_cls() - sys.stderr.write(''.join([ - "Read input format '%s' from '%s'.\n" % ( - args.kind, args.network_data.name), - "Wrote output format '%s' to '%s'\n" % ( - args.output_kind, args.directory)]) + "\n") - r.render_network_state(network_state=ns, target=args.directory) - - -if __name__ == '__main__': - main() - -# vi: ts=4 expandtab -- cgit v1.2.3 From c7555762f3a30190ce7726b4d013bc3e83c7e4b6 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Tue, 11 Sep 2018 17:31:46 +0000 Subject: user-data: jinja template to render instance-data.json in cloud-config Allow users to provide '## template: jinja' as the first line or their #cloud-config or custom script user-data parts. When this header exists, the cloud-config or script will be rendered as a jinja template. All instance metadata keys and values present in /run/cloud-init/instance-data.json will be available as jinja variables for the template. This means any cloud-config module or script can reference any standardized instance data in templates and scripts. Additionally, any standardized instance-data.json keys scoped below a '' key will be promoted as a top-level key for ease of reference in templates. This means that '{{ local_hostname }}' is the same as using the latest '{{ v#.local_hostname }}'. Since instance-data is written to /run/cloud-init/instance-data.json, make sure it is persisted across reboots when the cached datasource opject is reloaded. LP: #1791781 --- bash_completion/cloud-init | 2 + cloudinit/cmd/devel/__init__.py | 25 ++ cloudinit/cmd/devel/parser.py | 5 +- cloudinit/cmd/devel/render.py | 90 ++++++ cloudinit/cmd/devel/tests/test_render.py | 101 +++++++ cloudinit/cmd/main.py | 16 +- cloudinit/handlers/__init__.py | 11 +- cloudinit/handlers/boot_hook.py | 12 +- cloudinit/handlers/cloud_config.py | 15 +- cloudinit/handlers/jinja_template.py | 137 +++++++++ cloudinit/handlers/shell_script.py | 9 +- cloudinit/handlers/upstart_job.py | 9 +- cloudinit/helpers.py | 4 + cloudinit/log.py | 12 +- cloudinit/sources/__init__.py | 47 ++- cloudinit/sources/tests/test_init.py | 75 ++++- cloudinit/stages.py | 22 +- cloudinit/templater.py | 28 +- cloudinit/tests/helpers.py | 9 + doc/rtd/topics/capabilities.rst | 15 +- doc/rtd/topics/datasources.rst | 47 +++ doc/rtd/topics/format.rst | 21 +- tests/cloud_tests/testcases/base.py | 8 +- tests/unittests/test_builtin_handlers.py | 324 +++++++++++++++++++-- .../test_handler/test_handler_etc_hosts.py | 1 + tests/unittests/test_handler/test_handler_ntp.py | 1 + tests/unittests/test_templating.py | 23 ++ 27 files changed, 959 insertions(+), 110 deletions(-) create mode 100755 cloudinit/cmd/devel/render.py create mode 100644 cloudinit/cmd/devel/tests/test_render.py create mode 100644 cloudinit/handlers/jinja_template.py (limited to 'bash_completion') diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init index f38164b0..b3a5ced3 100644 --- a/bash_completion/cloud-init +++ b/bash_completion/cloud-init @@ -62,6 +62,8 @@ _cloudinit_complete() net-convert) COMPREPLY=($(compgen -W "--help --network-data --kind --directory --output-kind" -- $cur_word)) ;; + render) + COMPREPLY=($(compgen -W "--help --instance-data --debug" -- $cur_word)) schema) COMPREPLY=($(compgen -W "--help --config-file --doc --annotate" -- $cur_word)) ;; diff --git a/cloudinit/cmd/devel/__init__.py b/cloudinit/cmd/devel/__init__.py index e69de29b..3ae28b69 100644 --- a/cloudinit/cmd/devel/__init__.py +++ b/cloudinit/cmd/devel/__init__.py @@ -0,0 +1,25 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Common cloud-init devel commandline utility functions.""" + + +import logging + +from cloudinit import log +from cloudinit.stages import Init + + +def addLogHandlerCLI(logger, log_level): + """Add a commandline logging handler to emit messages to stderr.""" + formatter = logging.Formatter('%(levelname)s: %(message)s') + log.setupBasicLogging(log_level, formatter=formatter) + return logger + + +def read_cfg_paths(): + """Return a Paths object based on the system configuration on disk.""" + init = Init(ds_deps=[]) + init.read_cfg() + return init.paths + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/devel/parser.py b/cloudinit/cmd/devel/parser.py index 40a4b019..99a234ce 100644 --- a/cloudinit/cmd/devel/parser.py +++ b/cloudinit/cmd/devel/parser.py @@ -8,6 +8,7 @@ import argparse from cloudinit.config import schema from . import net_convert +from . import render def get_parser(parser=None): @@ -22,7 +23,9 @@ def get_parser(parser=None): ('schema', 'Validate cloud-config files for document schema', schema.get_parser, schema.handle_schema_args), (net_convert.NAME, net_convert.__doc__, - net_convert.get_parser, net_convert.handle_args) + net_convert.get_parser, net_convert.handle_args), + (render.NAME, render.__doc__, + render.get_parser, render.handle_args) ] for (subcmd, helpmsg, get_parser, handler) in subcmds: parser = subparsers.add_parser(subcmd, help=helpmsg) diff --git a/cloudinit/cmd/devel/render.py b/cloudinit/cmd/devel/render.py new file mode 100755 index 00000000..e85933db --- /dev/null +++ b/cloudinit/cmd/devel/render.py @@ -0,0 +1,90 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Debug jinja template rendering of user-data.""" + +import argparse +import os +import sys + +from cloudinit.handlers.jinja_template import render_jinja_payload_from_file +from cloudinit import log +from cloudinit.sources import INSTANCE_JSON_FILE +from cloudinit import util +from . import addLogHandlerCLI, read_cfg_paths + +NAME = 'render' +DEFAULT_INSTANCE_DATA = '/run/cloud-init/instance-data.json' + +LOG = log.getLogger(NAME) + + +def get_parser(parser=None): + """Build or extend and arg parser for jinja render utility. + + @param parser: Optional existing ArgumentParser instance representing the + subcommand which will be extended to support the args of this utility. + + @returns: ArgumentParser with proper argument configuration. + """ + if not parser: + parser = argparse.ArgumentParser(prog=NAME, description=__doc__) + parser.add_argument( + 'user_data', type=str, help='Path to the user-data file to render') + parser.add_argument( + '-i', '--instance-data', type=str, + help=('Optional path to instance-data.json file. Defaults to' + ' /run/cloud-init/instance-data.json')) + parser.add_argument('-d', '--debug', action='store_true', default=False, + help='Add verbose messages during template render') + return parser + + +def handle_args(name, args): + """Render the provided user-data template file using instance-data values. + + Also setup CLI log handlers to report to stderr since this is a development + utility which should be run by a human on the CLI. + + @return 0 on success, 1 on failure. + """ + addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING) + if not args.instance_data: + paths = read_cfg_paths() + instance_data_fn = os.path.join( + paths.run_dir, INSTANCE_JSON_FILE) + else: + instance_data_fn = args.instance_data + try: + with open(instance_data_fn) as stream: + instance_data = stream.read() + instance_data = util.load_json(instance_data) + except IOError: + LOG.error('Missing instance-data.json file: %s', instance_data_fn) + return 1 + try: + with open(args.user_data) as stream: + user_data = stream.read() + except IOError: + LOG.error('Missing user-data file: %s', args.user_data) + return 1 + rendered_payload = render_jinja_payload_from_file( + payload=user_data, payload_fn=args.user_data, + instance_data_file=instance_data_fn, + debug=True if args.debug else False) + if not rendered_payload: + LOG.error('Unable to render user-data file: %s', args.user_data) + return 1 + sys.stdout.write(rendered_payload) + return 0 + + +def main(): + args = get_parser().parse_args() + return(handle_args(NAME, args)) + + +if __name__ == '__main__': + sys.exit(main()) + + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/devel/tests/test_render.py b/cloudinit/cmd/devel/tests/test_render.py new file mode 100644 index 00000000..fc5d2c0d --- /dev/null +++ b/cloudinit/cmd/devel/tests/test_render.py @@ -0,0 +1,101 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from six import StringIO +import os + +from collections import namedtuple +from cloudinit.cmd.devel import render +from cloudinit.helpers import Paths +from cloudinit.sources import INSTANCE_JSON_FILE +from cloudinit.tests.helpers import CiTestCase, mock, skipUnlessJinja +from cloudinit.util import ensure_dir, write_file + + +class TestRender(CiTestCase): + + with_logs = True + + args = namedtuple('renderargs', 'user_data instance_data debug') + + def setUp(self): + super(TestRender, self).setUp() + self.tmp = self.tmp_dir() + + def test_handle_args_error_on_missing_user_data(self): + """When user_data file path does not exist, log an error.""" + absent_file = self.tmp_path('user-data', dir=self.tmp) + instance_data = self.tmp_path('instance-data', dir=self.tmp) + write_file(instance_data, '{}') + args = self.args( + user_data=absent_file, instance_data=instance_data, debug=False) + with mock.patch('sys.stderr', new_callable=StringIO): + self.assertEqual(1, render.handle_args('anyname', args)) + self.assertIn( + 'Missing user-data file: %s' % absent_file, + self.logs.getvalue()) + + def test_handle_args_error_on_missing_instance_data(self): + """When instance_data file path does not exist, log an error.""" + user_data = self.tmp_path('user-data', dir=self.tmp) + absent_file = self.tmp_path('instance-data', dir=self.tmp) + args = self.args( + user_data=user_data, instance_data=absent_file, debug=False) + with mock.patch('sys.stderr', new_callable=StringIO): + self.assertEqual(1, render.handle_args('anyname', args)) + self.assertIn( + 'Missing instance-data.json file: %s' % absent_file, + self.logs.getvalue()) + + def test_handle_args_defaults_instance_data(self): + """When no instance_data argument, default to configured run_dir.""" + user_data = self.tmp_path('user-data', dir=self.tmp) + run_dir = self.tmp_path('run_dir', dir=self.tmp) + ensure_dir(run_dir) + paths = Paths({'run_dir': run_dir}) + self.add_patch('cloudinit.cmd.devel.render.read_cfg_paths', 'm_paths') + self.m_paths.return_value = paths + args = self.args( + user_data=user_data, instance_data=None, debug=False) + with mock.patch('sys.stderr', new_callable=StringIO): + self.assertEqual(1, render.handle_args('anyname', args)) + json_file = os.path.join(run_dir, INSTANCE_JSON_FILE) + self.assertIn( + 'Missing instance-data.json file: %s' % json_file, + self.logs.getvalue()) + + @skipUnlessJinja() + def test_handle_args_renders_instance_data_vars_in_template(self): + """If user_data file is a jinja template render instance-data vars.""" + user_data = self.tmp_path('user-data', dir=self.tmp) + write_file(user_data, '##template: jinja\nrendering: {{ my_var }}') + instance_data = self.tmp_path('instance-data', dir=self.tmp) + write_file(instance_data, '{"my-var": "jinja worked"}') + args = self.args( + user_data=user_data, instance_data=instance_data, debug=True) + with mock.patch('sys.stderr', new_callable=StringIO) as m_console_err: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, render.handle_args('anyname', args)) + self.assertIn( + 'DEBUG: Converted jinja variables\n{', self.logs.getvalue()) + self.assertIn( + 'DEBUG: Converted jinja variables\n{', m_console_err.getvalue()) + self.assertEqual('rendering: jinja worked', m_stdout.getvalue()) + + @skipUnlessJinja() + def test_handle_args_warns_and_gives_up_on_invalid_jinja_operation(self): + """If user_data file has invalid jinja operations log warnings.""" + user_data = self.tmp_path('user-data', dir=self.tmp) + write_file(user_data, '##template: jinja\nrendering: {{ my-var }}') + instance_data = self.tmp_path('instance-data', dir=self.tmp) + write_file(instance_data, '{"my-var": "jinja worked"}') + args = self.args( + user_data=user_data, instance_data=instance_data, debug=True) + with mock.patch('sys.stderr', new_callable=StringIO): + self.assertEqual(1, render.handle_args('anyname', args)) + self.assertIn( + 'WARNING: Ignoring jinja template for %s: Undefined jinja' + ' variable: "my-var". Jinja tried subtraction. Perhaps you meant' + ' "my_var"?' % user_data, + self.logs.getvalue()) + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 4ea4fe7f..0eee583c 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -348,6 +348,7 @@ def main_init(name, args): LOG.debug("[%s] barreling on in force mode without datasource", mode) + _maybe_persist_instance_data(init) # Stage 6 iid = init.instancify() LOG.debug("[%s] %s will now be targeting instance id: %s. new=%s", @@ -490,6 +491,7 @@ def main_modules(action_name, args): print_exc(msg) if not args.force: return [(msg)] + _maybe_persist_instance_data(init) # Stage 3 mods = stages.Modules(init, extract_fns(args), reporter=args.reporter) # Stage 4 @@ -541,6 +543,7 @@ def main_single(name, args): " likely bad things to come!")) if not args.force: return 1 + _maybe_persist_instance_data(init) # Stage 3 mods = stages.Modules(init, extract_fns(args), reporter=args.reporter) mod_args = args.module_args @@ -688,6 +691,15 @@ def status_wrapper(name, args, data_d=None, link_d=None): return len(v1[mode]['errors']) +def _maybe_persist_instance_data(init): + """Write instance-data.json file if absent and datasource is restored.""" + if init.ds_restored: + instance_data_file = os.path.join( + init.paths.run_dir, sources.INSTANCE_JSON_FILE) + if not os.path.exists(instance_data_file): + init.datasource.persist_instance_data() + + def _maybe_set_hostname(init, stage, retry_stage): """Call set-hostname if metadata, vendordata or userdata provides it. @@ -887,6 +899,8 @@ def main(sysv_args=None): if __name__ == '__main__': if 'TZ' not in os.environ: os.environ['TZ'] = ":/etc/localtime" - main(sys.argv) + return_value = main(sys.argv) + if return_value: + sys.exit(return_value) # vi: ts=4 expandtab diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index c3576c04..0db75af9 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -41,7 +41,7 @@ PART_HANDLER_FN_TMPL = 'part-handler-%03d' # For parts without filenames PART_FN_TPL = 'part-%03d' -# Different file beginnings to there content type +# Different file beginnings to their content type INCLUSION_TYPES_MAP = { '#include': 'text/x-include-url', '#include-once': 'text/x-include-once-url', @@ -52,6 +52,7 @@ INCLUSION_TYPES_MAP = { '#cloud-boothook': 'text/cloud-boothook', '#cloud-config-archive': 'text/cloud-config-archive', '#cloud-config-jsonp': 'text/cloud-config-jsonp', + '## template: jinja': 'text/jinja2', } # Sorted longest first @@ -69,9 +70,13 @@ class Handler(object): def __repr__(self): return "%s: [%s]" % (type_utils.obj_name(self), self.list_types()) - @abc.abstractmethod def list_types(self): - raise NotImplementedError() + # Each subclass must define the supported content prefixes it handles. + if not hasattr(self, 'prefixes'): + raise NotImplementedError('Missing prefixes subclass attribute') + else: + return [INCLUSION_TYPES_MAP[prefix] + for prefix in getattr(self, 'prefixes')] @abc.abstractmethod def handle_part(self, *args, **kwargs): diff --git a/cloudinit/handlers/boot_hook.py b/cloudinit/handlers/boot_hook.py index 057b4dbc..dca50a49 100644 --- a/cloudinit/handlers/boot_hook.py +++ b/cloudinit/handlers/boot_hook.py @@ -17,10 +17,13 @@ from cloudinit import util from cloudinit.settings import (PER_ALWAYS) LOG = logging.getLogger(__name__) -BOOTHOOK_PREFIX = "#cloud-boothook" class BootHookPartHandler(handlers.Handler): + + # The content prefixes this handler understands. + prefixes = ['#cloud-boothook'] + def __init__(self, paths, datasource, **_kwargs): handlers.Handler.__init__(self, PER_ALWAYS) self.boothook_dir = paths.get_ipath("boothooks") @@ -28,16 +31,11 @@ class BootHookPartHandler(handlers.Handler): if datasource: self.instance_id = datasource.get_instance_id() - def list_types(self): - return [ - handlers.type_from_starts_with(BOOTHOOK_PREFIX), - ] - def _write_part(self, payload, filename): filename = util.clean_filename(filename) filepath = os.path.join(self.boothook_dir, filename) contents = util.strip_prefix_suffix(util.dos2unix(payload), - prefix=BOOTHOOK_PREFIX) + prefix=self.prefixes[0]) util.write_file(filepath, contents.lstrip(), 0o700) return filepath diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index 178a5b9b..99bf0e61 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -42,14 +42,12 @@ DEF_MERGERS = mergers.string_extract_mergers('dict(replace)+list()+str()') CLOUD_PREFIX = "#cloud-config" JSONP_PREFIX = "#cloud-config-jsonp" -# The file header -> content types this module will handle. -CC_TYPES = { - JSONP_PREFIX: handlers.type_from_starts_with(JSONP_PREFIX), - CLOUD_PREFIX: handlers.type_from_starts_with(CLOUD_PREFIX), -} - class CloudConfigPartHandler(handlers.Handler): + + # The content prefixes this handler understands. + prefixes = [CLOUD_PREFIX, JSONP_PREFIX] + def __init__(self, paths, **_kwargs): handlers.Handler.__init__(self, PER_ALWAYS, version=3) self.cloud_buf = None @@ -58,9 +56,6 @@ class CloudConfigPartHandler(handlers.Handler): self.cloud_fn = paths.get_ipath(_kwargs["cloud_config_path"]) self.file_names = [] - def list_types(self): - return list(CC_TYPES.values()) - def _write_cloud_config(self): if not self.cloud_fn: return @@ -138,7 +133,7 @@ class CloudConfigPartHandler(handlers.Handler): # First time through, merge with an empty dict... if self.cloud_buf is None or not self.file_names: self.cloud_buf = {} - if ctype == CC_TYPES[JSONP_PREFIX]: + if ctype == handlers.INCLUSION_TYPES_MAP[JSONP_PREFIX]: self._merge_patch(payload) else: self._merge_part(payload, headers) diff --git a/cloudinit/handlers/jinja_template.py b/cloudinit/handlers/jinja_template.py new file mode 100644 index 00000000..3fa4097e --- /dev/null +++ b/cloudinit/handlers/jinja_template.py @@ -0,0 +1,137 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +import os +import re + +try: + from jinja2.exceptions import UndefinedError as JUndefinedError +except ImportError: + # No jinja2 dependency + JUndefinedError = Exception + +from cloudinit import handlers +from cloudinit import log as logging +from cloudinit.sources import INSTANCE_JSON_FILE +from cloudinit.templater import render_string, MISSING_JINJA_PREFIX +from cloudinit.util import b64d, load_file, load_json, json_dumps + +from cloudinit.settings import PER_ALWAYS + +LOG = logging.getLogger(__name__) + + +class JinjaTemplatePartHandler(handlers.Handler): + + prefixes = ['## template: jinja'] + + def __init__(self, paths, **_kwargs): + handlers.Handler.__init__(self, PER_ALWAYS, version=3) + self.paths = paths + self.sub_handlers = {} + for handler in _kwargs.get('sub_handlers', []): + for ctype in handler.list_types(): + self.sub_handlers[ctype] = handler + + def handle_part(self, data, ctype, filename, payload, frequency, headers): + if ctype in handlers.CONTENT_SIGNALS: + return + jinja_json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) + rendered_payload = render_jinja_payload_from_file( + payload, filename, jinja_json_file) + if not rendered_payload: + return + subtype = handlers.type_from_starts_with(rendered_payload) + sub_handler = self.sub_handlers.get(subtype) + if not sub_handler: + LOG.warning( + 'Ignoring jinja template for %s. Could not find supported' + ' sub-handler for type %s', filename, subtype) + return + if sub_handler.handler_version == 3: + sub_handler.handle_part( + data, ctype, filename, rendered_payload, frequency, headers) + elif sub_handler.handler_version == 2: + sub_handler.handle_part( + data, ctype, filename, rendered_payload, frequency) + + +def render_jinja_payload_from_file( + payload, payload_fn, instance_data_file, debug=False): + """Render a jinja template payload sourcing variables from jinja_vars_path. + + @param payload: String of jinja template content. Should begin with + ## template: jinja\n. + @param payload_fn: String representing the filename from which the payload + was read used in error reporting. Generally in part-handling this is + 'part-##'. + @param instance_data_file: A path to a json file containing variables that + will be used as jinja template variables. + + @return: A string of jinja-rendered content with the jinja header removed. + Returns None on error. + """ + instance_data = {} + rendered_payload = None + if not os.path.exists(instance_data_file): + raise RuntimeError( + 'Cannot render jinja template vars. Instance data not yet' + ' present at %s' % instance_data_file) + instance_data = load_json(load_file(instance_data_file)) + rendered_payload = render_jinja_payload( + payload, payload_fn, instance_data, debug) + if not rendered_payload: + return None + return rendered_payload + + +def render_jinja_payload(payload, payload_fn, instance_data, debug=False): + instance_jinja_vars = convert_jinja_instance_data( + instance_data, + decode_paths=instance_data.get('base64-encoded-keys', [])) + if debug: + LOG.debug('Converted jinja variables\n%s', + json_dumps(instance_jinja_vars)) + try: + rendered_payload = render_string(payload, instance_jinja_vars) + except (TypeError, JUndefinedError) as e: + LOG.warning( + 'Ignoring jinja template for %s: %s', payload_fn, str(e)) + return None + warnings = [ + "'%s'" % var.replace(MISSING_JINJA_PREFIX, '') + for var in re.findall( + r'%s[^\s]+' % MISSING_JINJA_PREFIX, rendered_payload)] + if warnings: + LOG.warning( + "Could not render jinja template variables in file '%s': %s", + payload_fn, ', '.join(warnings)) + return rendered_payload + + +def convert_jinja_instance_data(data, prefix='', sep='/', decode_paths=()): + """Process instance-data.json dict for use in jinja templates. + + Replace hyphens with underscores for jinja templates and decode any + base64_encoded_keys. + """ + result = {} + decode_paths = [path.replace('-', '_') for path in decode_paths] + for key, value in sorted(data.items()): + if '-' in key: + # Standardize keys for use in #cloud-config/shell templates + key = key.replace('-', '_') + key_path = '{0}{1}{2}'.format(prefix, sep, key) if prefix else key + if key_path in decode_paths: + value = b64d(value) + if isinstance(value, dict): + result[key] = convert_jinja_instance_data( + value, key_path, sep=sep, decode_paths=decode_paths) + if re.match(r'v\d+', key): + # Copy values to top-level aliases + for subkey, subvalue in result[key].items(): + result[subkey] = subvalue + else: + result[key] = value + return result + +# vi: ts=4 expandtab diff --git a/cloudinit/handlers/shell_script.py b/cloudinit/handlers/shell_script.py index e4945a23..214714bc 100644 --- a/cloudinit/handlers/shell_script.py +++ b/cloudinit/handlers/shell_script.py @@ -17,21 +17,18 @@ from cloudinit import util from cloudinit.settings import (PER_ALWAYS) LOG = logging.getLogger(__name__) -SHELL_PREFIX = "#!" class ShellScriptPartHandler(handlers.Handler): + + prefixes = ['#!'] + def __init__(self, paths, **_kwargs): handlers.Handler.__init__(self, PER_ALWAYS) self.script_dir = paths.get_ipath_cur('scripts') if 'script_path' in _kwargs: self.script_dir = paths.get_ipath_cur(_kwargs['script_path']) - def list_types(self): - return [ - handlers.type_from_starts_with(SHELL_PREFIX), - ] - def handle_part(self, data, ctype, filename, payload, frequency): if ctype in handlers.CONTENT_SIGNALS: # TODO(harlowja): maybe delete existing things here diff --git a/cloudinit/handlers/upstart_job.py b/cloudinit/handlers/upstart_job.py index dc338769..83fb0724 100644 --- a/cloudinit/handlers/upstart_job.py +++ b/cloudinit/handlers/upstart_job.py @@ -18,19 +18,16 @@ from cloudinit import util from cloudinit.settings import (PER_INSTANCE) LOG = logging.getLogger(__name__) -UPSTART_PREFIX = "#upstart-job" class UpstartJobPartHandler(handlers.Handler): + + prefixes = ['#upstart-job'] + def __init__(self, paths, **_kwargs): handlers.Handler.__init__(self, PER_INSTANCE) self.upstart_dir = paths.upstart_conf_d - def list_types(self): - return [ - handlers.type_from_starts_with(UPSTART_PREFIX), - ] - def handle_part(self, data, ctype, filename, payload, frequency): if ctype in handlers.CONTENT_SIGNALS: return diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py index 1979cd96..3cc1fb19 100644 --- a/cloudinit/helpers.py +++ b/cloudinit/helpers.py @@ -449,4 +449,8 @@ class DefaultingConfigParser(RawConfigParser): contents = '\n'.join([header, contents, '']) return contents + +def identity(object): + return object + # vi: ts=4 expandtab diff --git a/cloudinit/log.py b/cloudinit/log.py index 1d75c9ff..5ae312ba 100644 --- a/cloudinit/log.py +++ b/cloudinit/log.py @@ -38,10 +38,18 @@ DEF_CON_FORMAT = '%(asctime)s - %(filename)s[%(levelname)s]: %(message)s' logging.Formatter.converter = time.gmtime -def setupBasicLogging(level=DEBUG): +def setupBasicLogging(level=DEBUG, formatter=None): + if not formatter: + formatter = logging.Formatter(DEF_CON_FORMAT) root = logging.getLogger() + for handler in root.handlers: + if hasattr(handler, 'stream') and hasattr(handler.stream, 'name'): + if handler.stream.name == '': + handler.setLevel(level) + return + # Didn't have an existing stderr handler; create a new handler console = logging.StreamHandler(sys.stderr) - console.setFormatter(logging.Formatter(DEF_CON_FORMAT)) + console.setFormatter(formatter) console.setLevel(level) root.addHandler(console) root.setLevel(level) diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 41fde9ba..a775f1a8 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -58,22 +58,27 @@ class InvalidMetaDataException(Exception): pass -def process_base64_metadata(metadata, key_path=''): - """Strip ci-b64 prefix and return metadata with base64-encoded-keys set.""" +def process_instance_metadata(metadata, key_path=''): + """Process all instance metadata cleaning it up for persisting as json. + + Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list + + @return Dict copy of processed metadata. + """ md_copy = copy.deepcopy(metadata) - md_copy['base64-encoded-keys'] = [] + md_copy['base64_encoded_keys'] = [] for key, val in metadata.items(): if key_path: sub_key_path = key_path + '/' + key else: sub_key_path = key if isinstance(val, str) and val.startswith('ci-b64:'): - md_copy['base64-encoded-keys'].append(sub_key_path) + md_copy['base64_encoded_keys'].append(sub_key_path) md_copy[key] = val.replace('ci-b64:', '') if isinstance(val, dict): - return_val = process_base64_metadata(val, sub_key_path) - md_copy['base64-encoded-keys'].extend( - return_val.pop('base64-encoded-keys')) + return_val = process_instance_metadata(val, sub_key_path) + md_copy['base64_encoded_keys'].extend( + return_val.pop('base64_encoded_keys')) md_copy[key] = return_val return md_copy @@ -180,15 +185,24 @@ class DataSource(object): """ self._dirty_cache = True return_value = self._get_data() - json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) if not return_value: return return_value + self.persist_instance_data() + return return_value + + def persist_instance_data(self): + """Process and write INSTANCE_JSON_FILE with all instance metadata. + Replace any hyphens with underscores in key names for use in template + processing. + + @return True on successful write, False otherwise. + """ instance_data = { 'ds': { - 'meta-data': self.metadata, - 'user-data': self.get_userdata_raw(), - 'vendor-data': self.get_vendordata_raw()}} + 'meta_data': self.metadata, + 'user_data': self.get_userdata_raw(), + 'vendor_data': self.get_vendordata_raw()}} if hasattr(self, 'network_json'): network_json = getattr(self, 'network_json') if network_json != UNSET: @@ -202,16 +216,17 @@ class DataSource(object): try: # Process content base64encoding unserializable values content = util.json_dumps(instance_data) - # Strip base64: prefix and return base64-encoded-keys - processed_data = process_base64_metadata(json.loads(content)) + # Strip base64: prefix and set base64_encoded_keys list. + processed_data = process_instance_metadata(json.loads(content)) except TypeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) - return return_value + return False except UnicodeDecodeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) - return return_value + return False + json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) write_json(json_file, processed_data, mode=0o600) - return return_value + return True def _get_data(self): """Walk metadata sources, process crawled data and save attributes.""" diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index 9e939c1e..8299af23 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -20,10 +20,12 @@ class DataSourceTestSubclassNet(DataSource): dsname = 'MyTestSubclass' url_max_wait = 55 - def __init__(self, sys_cfg, distro, paths, custom_userdata=None): + def __init__(self, sys_cfg, distro, paths, custom_userdata=None, + get_data_retval=True): super(DataSourceTestSubclassNet, self).__init__( sys_cfg, distro, paths) self._custom_userdata = custom_userdata + self._get_data_retval = get_data_retval def _get_cloud_name(self): return 'SubclassCloudName' @@ -37,7 +39,7 @@ class DataSourceTestSubclassNet(DataSource): else: self.userdata_raw = 'userdata_raw' self.vendordata_raw = 'vendordata_raw' - return True + return self._get_data_retval class InvalidDataSourceTestSubclassNet(DataSource): @@ -264,7 +266,18 @@ class TestDataSource(CiTestCase): self.assertEqual('fqdnhostname.domain.com', datasource.get_hostname(fqdn=True)) - def test_get_data_write_json_instance_data(self): + def test_get_data_does_not_write_instance_data_on_failure(self): + """get_data does not write INSTANCE_JSON_FILE on get_data False.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp}), + get_data_retval=False) + self.assertFalse(datasource.get_data()) + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + self.assertFalse( + os.path.exists(json_file), 'Found unexpected file %s' % json_file) + + def test_get_data_writes_json_instance_data_on_success(self): """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( @@ -273,7 +286,7 @@ class TestDataSource(CiTestCase): json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) expected = { - 'base64-encoded-keys': [], + 'base64_encoded_keys': [], 'v1': { 'availability-zone': 'myaz', 'cloud-name': 'subclasscloudname', @@ -281,11 +294,12 @@ class TestDataSource(CiTestCase): 'local-hostname': 'test-subclass-hostname', 'region': 'myregion'}, 'ds': { - 'meta-data': {'availability_zone': 'myaz', + 'meta_data': {'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', 'region': 'myregion'}, - 'user-data': 'userdata_raw', - 'vendor-data': 'vendordata_raw'}} + 'user_data': 'userdata_raw', + 'vendor_data': 'vendordata_raw'}} + self.maxDiff = None self.assertEqual(expected, util.load_json(content)) file_stat = os.stat(json_file) self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode)) @@ -296,7 +310,7 @@ class TestDataSource(CiTestCase): datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) - self.assertTrue(datasource.get_data()) + datasource.get_data() json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) expected_userdata = { @@ -306,7 +320,40 @@ class TestDataSource(CiTestCase): " 'cloudinit.helpers.Paths'>"}} instance_json = util.load_json(content) self.assertEqual( - expected_userdata, instance_json['ds']['user-data']) + expected_userdata, instance_json['ds']['user_data']) + + def test_persist_instance_data_writes_ec2_metadata_when_set(self): + """When ec2_metadata class attribute is set, persist to json.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp})) + datasource.ec2_metadata = UNSET + datasource.get_data() + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + instance_data = util.load_json(util.load_file(json_file)) + self.assertNotIn('ec2_metadata', instance_data['ds']) + datasource.ec2_metadata = {'ec2stuff': 'is good'} + datasource.persist_instance_data() + instance_data = util.load_json(util.load_file(json_file)) + self.assertEqual( + {'ec2stuff': 'is good'}, + instance_data['ds']['ec2_metadata']) + + def test_persist_instance_data_writes_network_json_when_set(self): + """When network_data.json class attribute is set, persist to json.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp})) + datasource.get_data() + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + instance_data = util.load_json(util.load_file(json_file)) + self.assertNotIn('network_json', instance_data['ds']) + datasource.network_json = {'network_json': 'is good'} + datasource.persist_instance_data() + instance_data = util.load_json(util.load_file(json_file)) + self.assertEqual( + {'network_json': 'is good'}, + instance_data['ds']['network_json']) @skipIf(not six.PY3, "json serialization on <= py2.7 handles bytes") def test_get_data_base64encodes_unserializable_bytes(self): @@ -320,11 +367,11 @@ class TestDataSource(CiTestCase): content = util.load_file(json_file) instance_json = util.load_json(content) self.assertEqual( - ['ds/user-data/key2/key2.1'], - instance_json['base64-encoded-keys']) + ['ds/user_data/key2/key2.1'], + instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': 'EjM='}}, - instance_json['ds']['user-data']) + instance_json['ds']['user_data']) @skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes") def test_get_data_handles_bytes_values(self): @@ -337,10 +384,10 @@ class TestDataSource(CiTestCase): json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) instance_json = util.load_json(content) - self.assertEqual([], instance_json['base64-encoded-keys']) + self.assertEqual([], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': '\x123'}}, - instance_json['ds']['user-data']) + instance_json['ds']['user_data']) @skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8") def test_non_utf8_encoding_logs_warning(self): diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 8874d405..ef5c6996 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -17,10 +17,11 @@ from cloudinit.settings import ( from cloudinit import handlers # Default handlers (used if not overridden) -from cloudinit.handlers import boot_hook as bh_part -from cloudinit.handlers import cloud_config as cc_part -from cloudinit.handlers import shell_script as ss_part -from cloudinit.handlers import upstart_job as up_part +from cloudinit.handlers.boot_hook import BootHookPartHandler +from cloudinit.handlers.cloud_config import CloudConfigPartHandler +from cloudinit.handlers.jinja_template import JinjaTemplatePartHandler +from cloudinit.handlers.shell_script import ShellScriptPartHandler +from cloudinit.handlers.upstart_job import UpstartJobPartHandler from cloudinit.event import EventType @@ -413,12 +414,17 @@ class Init(object): 'datasource': self.datasource, }) # TODO(harlowja) Hmmm, should we dynamically import these?? + cloudconfig_handler = CloudConfigPartHandler(**opts) + shellscript_handler = ShellScriptPartHandler(**opts) def_handlers = [ - cc_part.CloudConfigPartHandler(**opts), - ss_part.ShellScriptPartHandler(**opts), - bh_part.BootHookPartHandler(**opts), - up_part.UpstartJobPartHandler(**opts), + cloudconfig_handler, + shellscript_handler, + BootHookPartHandler(**opts), + UpstartJobPartHandler(**opts), ] + opts.update( + {'sub_handlers': [cloudconfig_handler, shellscript_handler]}) + def_handlers.append(JinjaTemplatePartHandler(**opts)) return def_handlers def _default_userdata_handlers(self): diff --git a/cloudinit/templater.py b/cloudinit/templater.py index 7e7acb86..b668674b 100644 --- a/cloudinit/templater.py +++ b/cloudinit/templater.py @@ -13,6 +13,7 @@ import collections import re + try: from Cheetah.Template import Template as CTemplate CHEETAH_AVAILABLE = True @@ -20,23 +21,44 @@ except (ImportError, AttributeError): CHEETAH_AVAILABLE = False try: - import jinja2 + from jinja2.runtime import implements_to_string from jinja2 import Template as JTemplate + from jinja2 import DebugUndefined as JUndefined JINJA_AVAILABLE = True except (ImportError, AttributeError): + from cloudinit.helpers import identity + implements_to_string = identity JINJA_AVAILABLE = False + JUndefined = object from cloudinit import log as logging from cloudinit import type_utils as tu from cloudinit import util + LOG = logging.getLogger(__name__) TYPE_MATCHER = re.compile(r"##\s*template:(.*)", re.I) BASIC_MATCHER = re.compile(r'\$\{([A-Za-z0-9_.]+)\}|\$([A-Za-z0-9_.]+)') +MISSING_JINJA_PREFIX = u'CI_MISSING_JINJA_VAR/' + + +@implements_to_string # Needed for python2.7. Otherwise cached super.__str__ +class UndefinedJinjaVariable(JUndefined): + """Class used to represent any undefined jinja template varible.""" + + def __str__(self): + return u'%s%s' % (MISSING_JINJA_PREFIX, self._undefined_name) + + def __sub__(self, other): + other = str(other).replace(MISSING_JINJA_PREFIX, '') + raise TypeError( + 'Undefined jinja variable: "{this}-{other}". Jinja tried' + ' subtraction. Perhaps you meant "{this}_{other}"?'.format( + this=self._undefined_name, other=other)) def basic_render(content, params): - """This does simple replacement of bash variable like templates. + """This does sumple replacement of bash variable like templates. It identifies patterns like ${a} or $a and can also identify patterns like ${a.b} or $a.b which will look for a key 'b' in the dictionary rooted @@ -82,7 +104,7 @@ def detect_template(text): # keep_trailing_newline is in jinja2 2.7+, not 2.6 add = "\n" if content.endswith("\n") else "" return JTemplate(content, - undefined=jinja2.StrictUndefined, + undefined=UndefinedJinjaVariable, trim_blocks=True).render(**params) + add if text.find("\n") != -1: diff --git a/cloudinit/tests/helpers.py b/cloudinit/tests/helpers.py index 42f56c27..2eb7b0cd 100644 --- a/cloudinit/tests/helpers.py +++ b/cloudinit/tests/helpers.py @@ -32,6 +32,7 @@ from cloudinit import cloud from cloudinit import distros from cloudinit import helpers as ch from cloudinit.sources import DataSourceNone +from cloudinit.templater import JINJA_AVAILABLE from cloudinit import util _real_subp = util.subp @@ -518,6 +519,14 @@ def skipUnlessJsonSchema(): _missing_jsonschema_dep, "No python-jsonschema dependency present.") +def skipUnlessJinja(): + return skipIf(not JINJA_AVAILABLE, "No jinja dependency present.") + + +def skipIfJinja(): + return skipIf(JINJA_AVAILABLE, "Jinja dependency present.") + + # older versions of mock do not have the useful 'assert_not_called' if not hasattr(mock.Mock, 'assert_not_called'): def __mock_assert_not_called(mmock): diff --git a/doc/rtd/topics/capabilities.rst b/doc/rtd/topics/capabilities.rst index 3e2c9e31..2d8e2538 100644 --- a/doc/rtd/topics/capabilities.rst +++ b/doc/rtd/topics/capabilities.rst @@ -16,13 +16,15 @@ User configurability `Cloud-init`_ 's behavior can be configured via user-data. - User-data can be given by the user at instance launch time. + User-data can be given by the user at instance launch time. See + :ref:`user_data_formats` for acceptable user-data content. + This is done via the ``--user-data`` or ``--user-data-file`` argument to ec2-run-instances for example. -* Check your local clients documentation for how to provide a `user-data` - string or `user-data` file for usage by cloud-init on instance creation. +* Check your local client's documentation for how to provide a `user-data` + string or `user-data` file to cloud-init on instance creation. Feature detection @@ -166,6 +168,13 @@ likely be promoted to top-level subcommands when stable. validation is work in progress and supports a subset of cloud-config modules. + * ``cloud-init devel render``: Use cloud-init's jinja template render to + process **#cloud-config** or **custom-scripts**, injecting any variables + from ``/run/cloud-init/instance-data.json``. It accepts a user-data file + containing the jinja template header ``## template: jinja`` and renders + that content with any instance-data.json variables present. + + .. _cli_clean: cloud-init clean diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index 83034589..14432e65 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -18,6 +18,8 @@ single way to access the different cloud systems methods to provide this data through the typical usage of subclasses. +.. _instance_metadata: + instance-data ------------- For reference, cloud-init stores all the metadata, vendordata and userdata @@ -110,6 +112,51 @@ Below is an instance-data.json example from an OpenStack instance: } } + +As of cloud-init v. 18.4, any values present in +``/run/cloud-init/instance-data.json`` can be used in cloud-init user data +scripts or cloud config data. This allows consumers to use cloud-init's +vendor-neutral, standardized metadata keys as well as datasource-specific +content for any scripts or cloud-config modules they are using. + +To use instance-data.json values in scripts and **#config-config** files the +user-data will need to contain the following header as the first line **## template: jinja**. Cloud-init will source all variables defined in +``/run/cloud-init/instance-data.json`` and allow scripts or cloud-config files +to reference those paths. Below are two examples:: + + * Cloud config calling home with the ec2 public hostname and avaliability-zone + ``` + ## template: jinja + #cloud-config + runcmd: + - echo 'EC2 public hostname allocated to instance: {{ ds.meta_data.public_hostname }}' > /tmp/instance_metadata + - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> /tmp/instance_metadata + - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", "availability-zone": "{{ v1.availability_zone }}"}' https://example.com.com + ``` + + * Custom user script performing different operations based on region + ``` + ## template: jinja + #!/bin/bash + {% if v1.region == 'us-east-2' -%} + echo 'Installing custom proxies for {{ v1.region }} + sudo apt-get install my-xtra-fast-stack + {%- endif %} + ... + + ``` + +.. note:: + Trying to reference jinja variables that don't exist in + instance-data.json will result in warnings in ``/var/log/cloud-init.log`` + and the following string in your rendered user-data: + ``CI_MISSING_JINJA_VAR/``. + +.. note:: + To save time designing your user-data for a specific cloud's + instance-data.json, use the 'render' cloud-init command on an + instance booted on your favorite cloud. See :ref:`cli_devel` for more + information. Datasource API diff --git a/doc/rtd/topics/format.rst b/doc/rtd/topics/format.rst index 1b0ff366..15234d21 100644 --- a/doc/rtd/topics/format.rst +++ b/doc/rtd/topics/format.rst @@ -1,6 +1,8 @@ -******* -Formats -******* +.. _user_data_formats: + +***************** +User-Data Formats +***************** User data that will be acted upon by cloud-init must be in one of the following types. @@ -65,6 +67,11 @@ Typically used by those who just want to execute a shell script. Begins with: ``#!`` or ``Content-Type: text/x-shellscript`` when using a MIME archive. +.. note:: + New in cloud-init v. 18.4: User-data scripts can also render cloud instance + metadata variables using jinja templating. See + :ref:`instance_metadata` for more information. + Example ------- @@ -103,12 +110,18 @@ These things include: - certain ssh keys should be imported - *and many more...* -**Note:** The file must be valid yaml syntax. +.. note:: + This file must be valid yaml syntax. See the :ref:`yaml_examples` section for a commented set of examples of supported cloud config formats. Begins with: ``#cloud-config`` or ``Content-Type: text/cloud-config`` when using a MIME archive. +.. note:: + New in cloud-init v. 18.4: Cloud config dta can also render cloud instance + metadata variables using jinja templating. See + :ref:`instance_metadata` for more information. + Upstart Job =========== diff --git a/tests/cloud_tests/testcases/base.py b/tests/cloud_tests/testcases/base.py index 696db8dd..27458271 100644 --- a/tests/cloud_tests/testcases/base.py +++ b/tests/cloud_tests/testcases/base.py @@ -168,7 +168,7 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) self.assertEqual( - ['ds/user-data'], instance_data['base64-encoded-keys']) + ['ds/user_data'], instance_data['base64_encoded_keys']) ds = instance_data.get('ds', {}) v1_data = instance_data.get('v1', {}) metadata = ds.get('meta-data', {}) @@ -214,8 +214,8 @@ class CloudTestCase(unittest.TestCase): instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) self.assertEqual( - ['ds/user-data', 'ds/vendor-data'], - sorted(instance_data['base64-encoded-keys'])) + ['ds/user_data', 'ds/vendor_data'], + sorted(instance_data['base64_encoded_keys'])) self.assertEqual('nocloud', v1_data['cloud-name']) self.assertIsNone( v1_data['availability-zone'], @@ -249,7 +249,7 @@ class CloudTestCase(unittest.TestCase): instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) self.assertEqual( - ['ds/user-data'], instance_data['base64-encoded-keys']) + ['ds/user_data'], instance_data['base64_encoded_keys']) self.assertEqual('nocloud', v1_data['cloud-name']) self.assertIsNone( v1_data['availability-zone'], diff --git a/tests/unittests/test_builtin_handlers.py b/tests/unittests/test_builtin_handlers.py index 9751ed95..abe820e1 100644 --- a/tests/unittests/test_builtin_handlers.py +++ b/tests/unittests/test_builtin_handlers.py @@ -2,27 +2,34 @@ """Tests of the built-in user data handlers.""" +import copy import os import shutil import tempfile +from textwrap import dedent -try: - from unittest import mock -except ImportError: - import mock -from cloudinit.tests import helpers as test_helpers +from cloudinit.tests.helpers import ( + FilesystemMockingTestCase, CiTestCase, mock, skipUnlessJinja) from cloudinit import handlers from cloudinit import helpers from cloudinit import util -from cloudinit.handlers import upstart_job +from cloudinit.handlers.cloud_config import CloudConfigPartHandler +from cloudinit.handlers.jinja_template import ( + JinjaTemplatePartHandler, convert_jinja_instance_data, + render_jinja_payload) +from cloudinit.handlers.shell_script import ShellScriptPartHandler +from cloudinit.handlers.upstart_job import UpstartJobPartHandler from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE) -class TestBuiltins(test_helpers.FilesystemMockingTestCase): +class TestUpstartJobPartHandler(FilesystemMockingTestCase): + + mpath = 'cloudinit.handlers.upstart_job.' + def test_upstart_frequency_no_out(self): c_root = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, c_root) @@ -32,14 +39,13 @@ class TestBuiltins(test_helpers.FilesystemMockingTestCase): 'cloud_dir': c_root, 'upstart_dir': up_root, }) - freq = PER_ALWAYS - h = upstart_job.UpstartJobPartHandler(paths) + h = UpstartJobPartHandler(paths) # No files should be written out when # the frequency is ! per-instance h.handle_part('', handlers.CONTENT_START, None, None, None) h.handle_part('blah', 'text/upstart-job', - 'test.conf', 'blah', freq) + 'test.conf', 'blah', frequency=PER_ALWAYS) h.handle_part('', handlers.CONTENT_END, None, None, None) self.assertEqual(0, len(os.listdir(up_root))) @@ -48,7 +54,6 @@ class TestBuiltins(test_helpers.FilesystemMockingTestCase): # files should be written out when frequency is ! per-instance new_root = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, new_root) - freq = PER_INSTANCE self.patchOS(new_root) self.patchUtils(new_root) @@ -56,22 +61,297 @@ class TestBuiltins(test_helpers.FilesystemMockingTestCase): 'upstart_dir': "/etc/upstart", }) - upstart_job.SUITABLE_UPSTART = True util.ensure_dir("/run") util.ensure_dir("/etc/upstart") - with mock.patch.object(util, 'subp') as mockobj: - h = upstart_job.UpstartJobPartHandler(paths) - h.handle_part('', handlers.CONTENT_START, - None, None, None) - h.handle_part('blah', 'text/upstart-job', - 'test.conf', 'blah', freq) - h.handle_part('', handlers.CONTENT_END, - None, None, None) + with mock.patch(self.mpath + 'SUITABLE_UPSTART', return_value=True): + with mock.patch.object(util, 'subp') as m_subp: + h = UpstartJobPartHandler(paths) + h.handle_part('', handlers.CONTENT_START, + None, None, None) + h.handle_part('blah', 'text/upstart-job', + 'test.conf', 'blah', frequency=PER_INSTANCE) + h.handle_part('', handlers.CONTENT_END, + None, None, None) - self.assertEqual(len(os.listdir('/etc/upstart')), 1) + self.assertEqual(len(os.listdir('/etc/upstart')), 1) - mockobj.assert_called_once_with( + m_subp.assert_called_once_with( ['initctl', 'reload-configuration'], capture=False) + +class TestJinjaTemplatePartHandler(CiTestCase): + + with_logs = True + + mpath = 'cloudinit.handlers.jinja_template.' + + def setUp(self): + super(TestJinjaTemplatePartHandler, self).setUp() + self.tmp = self.tmp_dir() + self.run_dir = os.path.join(self.tmp, 'run_dir') + util.ensure_dir(self.run_dir) + self.paths = helpers.Paths({ + 'cloud_dir': self.tmp, 'run_dir': self.run_dir}) + + def test_jinja_template_part_handler_defaults(self): + """On init, paths are saved and subhandler types are empty.""" + h = JinjaTemplatePartHandler(self.paths) + self.assertEqual(['## template: jinja'], h.prefixes) + self.assertEqual(3, h.handler_version) + self.assertEqual(self.paths, h.paths) + self.assertEqual({}, h.sub_handlers) + + def test_jinja_template_part_handler_looks_up_sub_handler_types(self): + """When sub_handlers are passed, init lists types of subhandlers.""" + script_handler = ShellScriptPartHandler(self.paths) + cloudconfig_handler = CloudConfigPartHandler(self.paths) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler, cloudconfig_handler]) + self.assertItemsEqual( + ['text/cloud-config', 'text/cloud-config-jsonp', + 'text/x-shellscript'], + h.sub_handlers) + + def test_jinja_template_part_handler_looks_up_subhandler_types(self): + """When sub_handlers are passed, init lists types of subhandlers.""" + script_handler = ShellScriptPartHandler(self.paths) + cloudconfig_handler = CloudConfigPartHandler(self.paths) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler, cloudconfig_handler]) + self.assertItemsEqual( + ['text/cloud-config', 'text/cloud-config-jsonp', + 'text/x-shellscript'], + h.sub_handlers) + + def test_jinja_template_handle_noop_on_content_signals(self): + """Perform no part handling when content type is CONTENT_SIGNALS.""" + script_handler = ShellScriptPartHandler(self.paths) + + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + with mock.patch.object(script_handler, 'handle_part') as m_handle_part: + h.handle_part( + data='data', ctype=handlers.CONTENT_START, filename='part-1', + payload='## template: jinja\n#!/bin/bash\necho himom', + frequency='freq', headers='headers') + m_handle_part.assert_not_called() + + @skipUnlessJinja() + def test_jinja_template_handle_subhandler_v2_with_clean_payload(self): + """Call version 2 subhandler.handle_part with stripped payload.""" + script_handler = ShellScriptPartHandler(self.paths) + self.assertEqual(2, script_handler.handler_version) + + # Create required instance-data.json file + instance_json = os.path.join(self.run_dir, 'instance-data.json') + instance_data = {'topkey': 'echo himom'} + util.write_file(instance_json, util.json_dumps(instance_data)) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + with mock.patch.object(script_handler, 'handle_part') as m_part: + # ctype with leading '!' not in handlers.CONTENT_SIGNALS + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_START, + filename='part01', + payload='## template: jinja \t \n#!/bin/bash\n{{ topkey }}', + frequency='freq', headers='headers') + m_part.assert_called_once_with( + 'data', '!__begin__', 'part01', '#!/bin/bash\necho himom', 'freq') + + @skipUnlessJinja() + def test_jinja_template_handle_subhandler_v3_with_clean_payload(self): + """Call version 3 subhandler.handle_part with stripped payload.""" + cloudcfg_handler = CloudConfigPartHandler(self.paths) + self.assertEqual(3, cloudcfg_handler.handler_version) + + # Create required instance-data.json file + instance_json = os.path.join(self.run_dir, 'instance-data.json') + instance_data = {'topkey': {'sub': 'runcmd: [echo hi]'}} + util.write_file(instance_json, util.json_dumps(instance_data)) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[cloudcfg_handler]) + with mock.patch.object(cloudcfg_handler, 'handle_part') as m_part: + # ctype with leading '!' not in handlers.CONTENT_SIGNALS + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_END, + filename='part01', + payload='## template: jinja\n#cloud-config\n{{ topkey.sub }}', + frequency='freq', headers='headers') + m_part.assert_called_once_with( + 'data', '!__end__', 'part01', '#cloud-config\nruncmd: [echo hi]', + 'freq', 'headers') + + def test_jinja_template_handle_errors_on_missing_instance_data_json(self): + """If instance-data is absent, raise an error from handle_part.""" + script_handler = ShellScriptPartHandler(self.paths) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + with self.assertRaises(RuntimeError) as context_manager: + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_START, + filename='part01', + payload='## template: jinja \n#!/bin/bash\necho himom', + frequency='freq', headers='headers') + script_file = os.path.join(script_handler.script_dir, 'part01') + self.assertEqual( + 'Cannot render jinja template vars. Instance data not yet present' + ' at {}/instance-data.json'.format( + self.run_dir), str(context_manager.exception)) + self.assertFalse( + os.path.exists(script_file), + 'Unexpected file created %s' % script_file) + + @skipUnlessJinja() + def test_jinja_template_handle_renders_jinja_content(self): + """When present, render jinja variables from instance-data.json.""" + script_handler = ShellScriptPartHandler(self.paths) + instance_json = os.path.join(self.run_dir, 'instance-data.json') + instance_data = {'topkey': {'subkey': 'echo himom'}} + util.write_file(instance_json, util.json_dumps(instance_data)) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_START, + filename='part01', + payload=( + '## template: jinja \n' + '#!/bin/bash\n' + '{{ topkey.subkey|default("nosubkey") }}'), + frequency='freq', headers='headers') + script_file = os.path.join(script_handler.script_dir, 'part01') + self.assertNotIn( + 'Instance data not yet present at {}/instance-data.json'.format( + self.run_dir), + self.logs.getvalue()) + self.assertEqual( + '#!/bin/bash\necho himom', util.load_file(script_file)) + + @skipUnlessJinja() + def test_jinja_template_handle_renders_jinja_content_missing_keys(self): + """When specified jinja variable is undefined, log a warning.""" + script_handler = ShellScriptPartHandler(self.paths) + instance_json = os.path.join(self.run_dir, 'instance-data.json') + instance_data = {'topkey': {'subkey': 'echo himom'}} + util.write_file(instance_json, util.json_dumps(instance_data)) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_START, + filename='part01', + payload='## template: jinja \n#!/bin/bash\n{{ goodtry }}', + frequency='freq', headers='headers') + script_file = os.path.join(script_handler.script_dir, 'part01') + self.assertTrue( + os.path.exists(script_file), + 'Missing expected file %s' % script_file) + self.assertIn( + "WARNING: Could not render jinja template variables in file" + " 'part01': 'goodtry'\n", + self.logs.getvalue()) + + +class TestConvertJinjaInstanceData(CiTestCase): + + def test_convert_instance_data_hyphens_to_underscores(self): + """Replace hyphenated keys with underscores in instance-data.""" + data = {'hyphenated-key': 'hyphenated-val', + 'underscore_delim_key': 'underscore_delimited_val'} + expected_data = {'hyphenated_key': 'hyphenated-val', + 'underscore_delim_key': 'underscore_delimited_val'} + self.assertEqual( + expected_data, + convert_jinja_instance_data(data=data)) + + def test_convert_instance_data_promotes_versioned_keys_to_top_level(self): + """Any versioned keys are promoted as top-level keys + + This provides any cloud-init standardized keys up at a top-level to + allow ease of reference for users. Intsead of v1.availability_zone, + the name availability_zone can be used in templates. + """ + data = {'ds': {'dskey1': 1, 'dskey2': 2}, + 'v1': {'v1key1': 'v1.1'}, + 'v2': {'v2key1': 'v2.1'}} + expected_data = copy.deepcopy(data) + expected_data.update({'v1key1': 'v1.1', 'v2key1': 'v2.1'}) + + converted_data = convert_jinja_instance_data(data=data) + self.assertItemsEqual( + ['ds', 'v1', 'v2', 'v1key1', 'v2key1'], converted_data.keys()) + self.assertEqual( + expected_data, + converted_data) + + def test_convert_instance_data_most_recent_version_of_promoted_keys(self): + """The most-recent versioned key value is promoted to top-level.""" + data = {'v1': {'key1': 'old v1 key1', 'key2': 'old v1 key2'}, + 'v2': {'key1': 'newer v2 key1', 'key3': 'newer v2 key3'}, + 'v3': {'key1': 'newest v3 key1'}} + expected_data = copy.deepcopy(data) + expected_data.update( + {'key1': 'newest v3 key1', 'key2': 'old v1 key2', + 'key3': 'newer v2 key3'}) + + converted_data = convert_jinja_instance_data(data=data) + self.assertEqual( + expected_data, + converted_data) + + def test_convert_instance_data_decodes_decode_paths(self): + """Any decode_paths provided are decoded by convert_instance_data.""" + data = {'key1': {'subkey1': 'aGkgbW9t'}, 'key2': 'aGkgZGFk'} + expected_data = copy.deepcopy(data) + expected_data['key1']['subkey1'] = 'hi mom' + + converted_data = convert_jinja_instance_data( + data=data, decode_paths=('key1/subkey1',)) + self.assertEqual( + expected_data, + converted_data) + + +class TestRenderJinjaPayload(CiTestCase): + + with_logs = True + + @skipUnlessJinja() + def test_render_jinja_payload_logs_jinja_vars_on_debug(self): + """When debug is True, log jinja varables available.""" + payload = ( + '## template: jinja\n#!/bin/sh\necho hi from {{ v1.hostname }}') + instance_data = {'v1': {'hostname': 'foo'}, 'instance-id': 'iid'} + expected_log = dedent("""\ + DEBUG: Converted jinja variables + { + "hostname": "foo", + "instance_id": "iid", + "v1": { + "hostname": "foo" + } + } + """) + self.assertEqual( + render_jinja_payload( + payload=payload, payload_fn='myfile', + instance_data=instance_data, debug=True), + '#!/bin/sh\necho hi from foo') + self.assertEqual(expected_log, self.logs.getvalue()) + + @skipUnlessJinja() + def test_render_jinja_payload_replaces_missing_variables_and_warns(self): + """Warn on missing jinja variables and replace the absent variable.""" + payload = ( + '## template: jinja\n#!/bin/sh\necho hi from {{ NOTHERE }}') + instance_data = {'v1': {'hostname': 'foo'}, 'instance-id': 'iid'} + self.assertEqual( + render_jinja_payload( + payload=payload, payload_fn='myfile', + instance_data=instance_data), + '#!/bin/sh\necho hi from CI_MISSING_JINJA_VAR/NOTHERE') + expected_log = ( + 'WARNING: Could not render jinja template variables in file' + " 'myfile': 'NOTHERE'") + self.assertIn(expected_log, self.logs.getvalue()) + # vi: ts=4 expandtab diff --git a/tests/unittests/test_handler/test_handler_etc_hosts.py b/tests/unittests/test_handler/test_handler_etc_hosts.py index ced05a8d..d854afcb 100644 --- a/tests/unittests/test_handler/test_handler_etc_hosts.py +++ b/tests/unittests/test_handler/test_handler_etc_hosts.py @@ -49,6 +49,7 @@ class TestHostsFile(t_help.FilesystemMockingTestCase): if '192.168.1.1\tblah.blah.us\tblah' not in contents: self.assertIsNone('Default etc/hosts content modified') + @t_help.skipUnlessJinja() def test_write_etc_hosts_suse_template(self): cfg = { 'manage_etc_hosts': 'template', diff --git a/tests/unittests/test_handler/test_handler_ntp.py b/tests/unittests/test_handler/test_handler_ntp.py index 6fe3659d..0f22e579 100644 --- a/tests/unittests/test_handler/test_handler_ntp.py +++ b/tests/unittests/test_handler/test_handler_ntp.py @@ -3,6 +3,7 @@ from cloudinit.config import cc_ntp from cloudinit.sources import DataSourceNone from cloudinit import (distros, helpers, cloud, util) + from cloudinit.tests.helpers import ( CiTestCase, FilesystemMockingTestCase, mock, skipUnlessJsonSchema) diff --git a/tests/unittests/test_templating.py b/tests/unittests/test_templating.py index 20c87efa..c36e6eb0 100644 --- a/tests/unittests/test_templating.py +++ b/tests/unittests/test_templating.py @@ -21,6 +21,9 @@ except ImportError: class TestTemplates(test_helpers.CiTestCase): + + with_logs = True + jinja_utf8 = b'It\xe2\x80\x99s not ascii, {{name}}\n' jinja_utf8_rbob = b'It\xe2\x80\x99s not ascii, bob\n'.decode('utf-8') @@ -124,6 +127,13 @@ $a,$b''' self.add_header("jinja", self.jinja_utf8), {"name": "bob"}), self.jinja_utf8_rbob) + def test_jinja_nonascii_render_undefined_variables_to_default_py3(self): + """Test py3 jinja render_to_string with undefined variable default.""" + self.assertEqual( + templater.render_string( + self.add_header("jinja", self.jinja_utf8), {}), + self.jinja_utf8_rbob.replace('bob', 'CI_MISSING_JINJA_VAR/name')) + def test_jinja_nonascii_render_to_file(self): """Test jinja render_to_file of a filename with non-ascii content.""" tmpl_fn = self.tmp_path("j-render-to-file.template") @@ -144,5 +154,18 @@ $a,$b''' result = templater.render_from_file(tmpl_fn, {"name": "bob"}) self.assertEqual(result, self.jinja_utf8_rbob) + @test_helpers.skipIfJinja() + def test_jinja_warns_on_missing_dep_and_uses_basic_renderer(self): + """Test jinja render_from_file will fallback to basic renderer.""" + tmpl_fn = self.tmp_path("j-render-from-file.template") + write_file(tmpl_fn, omode="wb", + content=self.add_header( + "jinja", self.jinja_utf8).encode('utf-8')) + result = templater.render_from_file(tmpl_fn, {"name": "bob"}) + self.assertEqual(result, self.jinja_utf8.decode()) + self.assertIn( + 'WARNING: Jinja not available as the selected renderer for desired' + ' template, reverting to the basic renderer.', + self.logs.getvalue()) # vi: ts=4 expandtab -- cgit v1.2.3 From 84bf2482ad569357541e94d8f7f90cf0cdd759e2 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Fri, 14 Sep 2018 09:59:10 -0400 Subject: bash_completion/cloud-init: fix shell syntax error. A syntax error creeped in with commit c7555762f3a3. --- bash_completion/cloud-init | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'bash_completion') diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init index b3a5ced3..6d01bf3a 100644 --- a/bash_completion/cloud-init +++ b/bash_completion/cloud-init @@ -63,7 +63,7 @@ _cloudinit_complete() COMPREPLY=($(compgen -W "--help --network-data --kind --directory --output-kind" -- $cur_word)) ;; render) - COMPREPLY=($(compgen -W "--help --instance-data --debug" -- $cur_word)) + COMPREPLY=($(compgen -W "--help --instance-data --debug" -- $cur_word));; schema) COMPREPLY=($(compgen -W "--help --config-file --doc --annotate" -- $cur_word)) ;; -- cgit v1.2.3 From fc4b966ba928b30b1c586407e752e0b51b1031e8 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Tue, 25 Sep 2018 21:59:16 +0000 Subject: cli: add cloud-init query subcommand to query instance metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cloud-init caches any cloud metadata crawled during boot in the file /run/cloud-init/instance-data.json. Cloud-init also standardizes some of that metadata across all clouds. The command 'cloud-init query' surfaces a simple CLI to query or format any cached instance metadata so that scripts or end-users do not have to write tools to crawl metadata themselves. Since 'cloud-init query' is runnable by non-root users, redact any sensitive data from instance-data.json and provide a root-readable unredacted instance-data-sensitive.json. Datasources can now define a sensitive_metadata_keys tuple which will redact any matching keys which could contain passwords or credentials from instance-data.json. Also add the following standardized 'v1' instance-data.json keys:   - user_data: The base64encoded user-data provided at instance launch   - vendor_data: Any vendor_data provided to the instance at launch   - underscore_delimited versions of existing hyphenated keys:     instance_id, local_hostname, availability_zone, cloud_name --- bash_completion/cloud-init | 4 +- cloudinit/cmd/devel/render.py | 7 +- cloudinit/cmd/main.py | 10 ++ cloudinit/cmd/query.py | 155 ++++++++++++++++++ cloudinit/cmd/tests/test_query.py | 193 +++++++++++++++++++++++ cloudinit/helpers.py | 4 + cloudinit/sources/__init__.py | 76 +++++++-- cloudinit/sources/tests/test_init.py | 130 ++++++++++++--- doc/rtd/index.rst | 1 + doc/rtd/topics/capabilities.rst | 105 ++++++++++--- doc/rtd/topics/datasources.rst | 148 +---------------- doc/rtd/topics/instancedata.rst | 297 +++++++++++++++++++++++++++++++++++ integration-requirements.txt | 3 +- tests/cloud_tests/testcases/base.py | 52 +++--- 14 files changed, 952 insertions(+), 233 deletions(-) create mode 100644 cloudinit/cmd/query.py create mode 100644 cloudinit/cmd/tests/test_query.py create mode 100644 doc/rtd/topics/instancedata.rst (limited to 'bash_completion') diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init index 6d01bf3a..8c25032f 100644 --- a/bash_completion/cloud-init +++ b/bash_completion/cloud-init @@ -10,7 +10,7 @@ _cloudinit_complete() cur_word="${COMP_WORDS[COMP_CWORD]}" prev_word="${COMP_WORDS[COMP_CWORD-1]}" - subcmds="analyze clean collect-logs devel dhclient-hook features init modules single status" + subcmds="analyze clean collect-logs devel dhclient-hook features init modules query single status" base_params="--help --file --version --debug --force" case ${COMP_CWORD} in 1) @@ -40,6 +40,8 @@ _cloudinit_complete() COMPREPLY=($(compgen -W "--help --mode" -- $cur_word)) ;; + query) + COMPREPLY=($(compgen -W "--all --help --instance-data --list-keys --user-data --vendor-data --debug" -- $cur_word));; single) COMPREPLY=($(compgen -W "--help --name --frequency --report" -- $cur_word)) ;; diff --git a/cloudinit/cmd/devel/render.py b/cloudinit/cmd/devel/render.py index e85933db..2ba6b681 100755 --- a/cloudinit/cmd/devel/render.py +++ b/cloudinit/cmd/devel/render.py @@ -9,7 +9,6 @@ import sys from cloudinit.handlers.jinja_template import render_jinja_payload_from_file from cloudinit import log from cloudinit.sources import INSTANCE_JSON_FILE -from cloudinit import util from . import addLogHandlerCLI, read_cfg_paths NAME = 'render' @@ -54,11 +53,7 @@ def handle_args(name, args): paths.run_dir, INSTANCE_JSON_FILE) else: instance_data_fn = args.instance_data - try: - with open(instance_data_fn) as stream: - instance_data = stream.read() - instance_data = util.load_json(instance_data) - except IOError: + if not os.path.exists(instance_data_fn): LOG.error('Missing instance-data.json file: %s', instance_data_fn) return 1 try: diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 0eee583c..5a437020 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -791,6 +791,10 @@ def main(sysv_args=None): ' pass to this module')) parser_single.set_defaults(action=('single', main_single)) + parser_query = subparsers.add_parser( + 'query', + help='Query standardized instance metadata from the command line.') + parser_dhclient = subparsers.add_parser('dhclient-hook', help=('run the dhclient hook' 'to record network info')) @@ -842,6 +846,12 @@ def main(sysv_args=None): clean_parser(parser_clean) parser_clean.set_defaults( action=('clean', handle_clean_args)) + elif sysv_args[0] == 'query': + from cloudinit.cmd.query import ( + get_parser as query_parser, handle_args as handle_query_args) + query_parser(parser_query) + parser_query.set_defaults( + action=('render', handle_query_args)) elif sysv_args[0] == 'status': from cloudinit.cmd.status import ( get_parser as status_parser, handle_status_args) diff --git a/cloudinit/cmd/query.py b/cloudinit/cmd/query.py new file mode 100644 index 00000000..7d2d4fe4 --- /dev/null +++ b/cloudinit/cmd/query.py @@ -0,0 +1,155 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Query standardized instance metadata from the command line.""" + +import argparse +import os +import six +import sys + +from cloudinit.handlers.jinja_template import ( + convert_jinja_instance_data, render_jinja_payload) +from cloudinit.cmd.devel import addLogHandlerCLI, read_cfg_paths +from cloudinit import log +from cloudinit.sources import ( + INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE) +from cloudinit import util + +NAME = 'query' +LOG = log.getLogger(NAME) + + +def get_parser(parser=None): + """Build or extend an arg parser for query utility. + + @param parser: Optional existing ArgumentParser instance representing the + query subcommand which will be extended to support the args of + this utility. + + @returns: ArgumentParser with proper argument configuration. + """ + if not parser: + parser = argparse.ArgumentParser( + prog=NAME, description='Query cloud-init instance data') + parser.add_argument( + '-d', '--debug', action='store_true', default=False, + help='Add verbose messages during template render') + parser.add_argument( + '-i', '--instance-data', type=str, + help=('Path to instance-data.json file. Default is /run/cloud-init/%s' + % INSTANCE_JSON_FILE)) + parser.add_argument( + '-l', '--list-keys', action='store_true', default=False, + help=('List query keys available at the provided instance-data' + ' .')) + parser.add_argument( + '-u', '--user-data', type=str, + help=('Path to user-data file. Default is' + ' /var/lib/cloud/instance/user-data.txt')) + parser.add_argument( + '-v', '--vendor-data', type=str, + help=('Path to vendor-data file. Default is' + ' /var/lib/cloud/instance/vendor-data.txt')) + parser.add_argument( + 'varname', type=str, nargs='?', + help=('A dot-delimited instance data variable to query from' + ' instance-data query. For example: v2.local_hostname')) + parser.add_argument( + '-a', '--all', action='store_true', default=False, dest='dump_all', + help='Dump all available instance-data') + parser.add_argument( + '-f', '--format', type=str, dest='format', + help=('Optionally specify a custom output format string. Any' + ' instance-data variable can be specified between double-curly' + ' braces. For example -f "{{ v2.cloud_name }}"')) + return parser + + +def handle_args(name, args): + """Handle calls to 'cloud-init query' as a subcommand.""" + paths = None + addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING) + if not any([args.list_keys, args.varname, args.format, args.dump_all]): + LOG.error( + 'Expected one of the options: --all, --format,' + ' --list-keys or varname') + get_parser().print_help() + return 1 + + uid = os.getuid() + if not all([args.instance_data, args.user_data, args.vendor_data]): + paths = read_cfg_paths() + if not args.instance_data: + if uid == 0: + default_json_fn = INSTANCE_JSON_SENSITIVE_FILE + else: + default_json_fn = INSTANCE_JSON_FILE # World readable + instance_data_fn = os.path.join(paths.run_dir, default_json_fn) + else: + instance_data_fn = args.instance_data + if not args.user_data: + user_data_fn = os.path.join(paths.instance_link, 'user-data.txt') + else: + user_data_fn = args.user_data + if not args.vendor_data: + vendor_data_fn = os.path.join(paths.instance_link, 'vendor-data.txt') + else: + vendor_data_fn = args.vendor_data + + try: + instance_json = util.load_file(instance_data_fn) + except IOError: + LOG.error('Missing instance-data.json file: %s', instance_data_fn) + return 1 + + instance_data = util.load_json(instance_json) + if uid != 0: + instance_data['userdata'] = ( + '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, user_data_fn)) + instance_data['vendordata'] = ( + '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, vendor_data_fn)) + else: + instance_data['userdata'] = util.load_file(user_data_fn) + instance_data['vendordata'] = util.load_file(vendor_data_fn) + if args.format: + payload = '## template: jinja\n{fmt}'.format(fmt=args.format) + rendered_payload = render_jinja_payload( + payload=payload, payload_fn='query commandline', + instance_data=instance_data, + debug=True if args.debug else False) + if rendered_payload: + print(rendered_payload) + return 0 + return 1 + + response = convert_jinja_instance_data(instance_data) + if args.varname: + try: + for var in args.varname.split('.'): + response = response[var] + except KeyError: + LOG.error('Undefined instance-data key %s', args.varname) + return 1 + if args.list_keys: + if not isinstance(response, dict): + LOG.error("--list-keys provided but '%s' is not a dict", var) + return 1 + response = '\n'.join(sorted(response.keys())) + elif args.list_keys: + response = '\n'.join(sorted(response.keys())) + if not isinstance(response, six.string_types): + response = util.json_dumps(response) + print(response) + return 0 + + +def main(): + """Tool to query specific instance-data values.""" + parser = get_parser() + sys.exit(handle_args(NAME, parser.parse_args())) + + +if __name__ == '__main__': + main() + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/tests/test_query.py b/cloudinit/cmd/tests/test_query.py new file mode 100644 index 00000000..fb87c6ab --- /dev/null +++ b/cloudinit/cmd/tests/test_query.py @@ -0,0 +1,193 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from six import StringIO +from textwrap import dedent +import os + +from collections import namedtuple +from cloudinit.cmd import query +from cloudinit.helpers import Paths +from cloudinit.sources import REDACT_SENSITIVE_VALUE, INSTANCE_JSON_FILE +from cloudinit.tests.helpers import CiTestCase, mock +from cloudinit.util import ensure_dir, write_file + + +class TestQuery(CiTestCase): + + with_logs = True + + args = namedtuple( + 'queryargs', + ('debug dump_all format instance_data list_keys user_data vendor_data' + ' varname')) + + def setUp(self): + super(TestQuery, self).setUp() + self.tmp = self.tmp_dir() + self.instance_data = self.tmp_path('instance-data', dir=self.tmp) + + def test_handle_args_error_on_missing_param(self): + """Error when missing required parameters and print usage.""" + args = self.args( + debug=False, dump_all=False, format=None, instance_data=None, + list_keys=False, user_data=None, vendor_data=None, varname=None) + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(1, query.handle_args('anyname', args)) + expected_error = ( + 'ERROR: Expected one of the options: --all, --format, --list-keys' + ' or varname\n') + self.assertIn(expected_error, self.logs.getvalue()) + self.assertIn('usage: query', m_stdout.getvalue()) + self.assertIn(expected_error, m_stderr.getvalue()) + + def test_handle_args_error_on_missing_instance_data(self): + """When instance_data file path does not exist, log an error.""" + absent_fn = self.tmp_path('absent', dir=self.tmp) + args = self.args( + debug=False, dump_all=True, format=None, instance_data=absent_fn, + list_keys=False, user_data='ud', vendor_data='vd', varname=None) + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + self.assertEqual(1, query.handle_args('anyname', args)) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % absent_fn, + self.logs.getvalue()) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % absent_fn, + m_stderr.getvalue()) + + def test_handle_args_defaults_instance_data(self): + """When no instance_data argument, default to configured run_dir.""" + args = self.args( + debug=False, dump_all=True, format=None, instance_data=None, + list_keys=False, user_data=None, vendor_data=None, varname=None) + run_dir = self.tmp_path('run_dir', dir=self.tmp) + ensure_dir(run_dir) + paths = Paths({'run_dir': run_dir}) + self.add_patch('cloudinit.cmd.query.read_cfg_paths', 'm_paths') + self.m_paths.return_value = paths + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + self.assertEqual(1, query.handle_args('anyname', args)) + json_file = os.path.join(run_dir, INSTANCE_JSON_FILE) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % json_file, + self.logs.getvalue()) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % json_file, + m_stderr.getvalue()) + + def test_handle_args_dumps_all_instance_data(self): + """When --all is specified query will dump all instance data vars.""" + write_file(self.instance_data, '{"my-var": "it worked"}') + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, list_keys=False, + user_data='ud', vendor_data='vd', varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual( + '{\n "my_var": "it worked",\n "userdata": "<%s> file:ud",\n' + ' "vendordata": "<%s> file:vd"\n}\n' % ( + REDACT_SENSITIVE_VALUE, REDACT_SENSITIVE_VALUE), + m_stdout.getvalue()) + + def test_handle_args_returns_top_level_varname(self): + """When the argument varname is passed, report its value.""" + write_file(self.instance_data, '{"my-var": "it worked"}') + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, list_keys=False, + user_data='ud', vendor_data='vd', varname='my_var') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual('it worked\n', m_stdout.getvalue()) + + def test_handle_args_returns_nested_varname(self): + """If user_data file is a jinja template render instance-data vars.""" + write_file(self.instance_data, + '{"v1": {"key-2": "value-2"}, "my-var": "it worked"}') + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, user_data='ud', vendor_data='vd', + list_keys=False, varname='v1.key_2') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual('value-2\n', m_stdout.getvalue()) + + def test_handle_args_returns_standardized_vars_to_top_level_aliases(self): + """Any standardized vars under v# are promoted as top-level aliases.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},' + ' "top": "gun"}') + expected = dedent("""\ + { + "top": "gun", + "userdata": " file:ud", + "v1": { + "v1_1": "val1.1" + }, + "v1_1": "val1.1", + "v2": { + "v2_2": "val2.2" + }, + "v2_2": "val2.2", + "vendordata": " file:vd" + } + """) + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, user_data='ud', vendor_data='vd', + list_keys=False, varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_sorts_top_level_keys_when_no_varname(self): + """Sort all top-level keys when only --list-keys provided.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},' + ' "top": "gun"}') + expected = 'top\nuserdata\nv1\nv1_1\nv2\nv2_2\nvendordata\n' + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, user_data='ud', + vendor_data='vd', varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_sorts_nested_keys_when_varname(self): + """Sort all nested keys of varname object when --list-keys provided.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2":' + + ' {"v2_2": "val2.2"}, "top": "gun"}') + expected = 'v1_1\nv1_2\n' + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, + user_data='ud', vendor_data='vd', varname='v1') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_errors_when_varname_is_not_a_dict(self): + """Raise an error when --list-keys and varname specify a non-list.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2": ' + + '{"v2_2": "val2.2"}, "top": "gun"}') + expected_error = "ERROR: --list-keys provided but 'top' is not a dict" + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, user_data='ud', + vendor_data='vd', varname='top') + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(1, query.handle_args('anyname', args)) + self.assertEqual('', m_stdout.getvalue()) + self.assertIn(expected_error, m_stderr.getvalue()) + +# vi: ts=4 expandtab diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py index 3cc1fb19..dcd2645e 100644 --- a/cloudinit/helpers.py +++ b/cloudinit/helpers.py @@ -239,6 +239,10 @@ class ConfigMerger(object): if cc_fn and os.path.isfile(cc_fn): try: i_cfgs.append(util.read_conf(cc_fn)) + except PermissionError: + LOG.debug( + 'Skipped loading cloud-config from %s due to' + ' non-root.', cc_fn) except Exception: util.logexc(LOG, 'Failed loading of cloud-config from %s', cc_fn) diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index a775f1a8..730e8174 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -38,8 +38,12 @@ DEP_FILESYSTEM = "FILESYSTEM" DEP_NETWORK = "NETWORK" DS_PREFIX = 'DataSource' -# File in which instance meta-data, user-data and vendor-data is written +# File in which public available instance meta-data is written +# security-sensitive key values are redacted from this world-readable file INSTANCE_JSON_FILE = 'instance-data.json' +# security-sensitive key values are present in this root-readable file +INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json' +REDACT_SENSITIVE_VALUE = 'redacted for non-root user' # Key which can be provide a cloud's official product name to cloud-init METADATA_CLOUD_NAME_KEY = 'cloud-name' @@ -58,7 +62,7 @@ class InvalidMetaDataException(Exception): pass -def process_instance_metadata(metadata, key_path=''): +def process_instance_metadata(metadata, key_path='', sensitive_keys=()): """Process all instance metadata cleaning it up for persisting as json. Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list @@ -67,22 +71,46 @@ def process_instance_metadata(metadata, key_path=''): """ md_copy = copy.deepcopy(metadata) md_copy['base64_encoded_keys'] = [] + md_copy['sensitive_keys'] = [] for key, val in metadata.items(): if key_path: sub_key_path = key_path + '/' + key else: sub_key_path = key + if key in sensitive_keys or sub_key_path in sensitive_keys: + md_copy['sensitive_keys'].append(sub_key_path) if isinstance(val, str) and val.startswith('ci-b64:'): md_copy['base64_encoded_keys'].append(sub_key_path) md_copy[key] = val.replace('ci-b64:', '') if isinstance(val, dict): - return_val = process_instance_metadata(val, sub_key_path) + return_val = process_instance_metadata( + val, sub_key_path, sensitive_keys) md_copy['base64_encoded_keys'].extend( return_val.pop('base64_encoded_keys')) + md_copy['sensitive_keys'].extend( + return_val.pop('sensitive_keys')) md_copy[key] = return_val return md_copy +def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE): + """Redact any sensitive keys from to provided metadata dictionary. + + Replace any keys values listed in 'sensitive_keys' with redact_value. + """ + if not metadata.get('sensitive_keys', []): + return metadata + md_copy = copy.deepcopy(metadata) + for key_path in metadata.get('sensitive_keys'): + path_parts = key_path.split('/') + obj = md_copy + for path in path_parts: + if isinstance(obj[path], dict) and path != path_parts[-1]: + obj = obj[path] + obj[path] = redact_value + return md_copy + + URLParams = namedtuple( 'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries']) @@ -127,6 +155,10 @@ class DataSource(object): _dirty_cache = False + # N-tuple of keypaths or keynames redact from instance-data.json for + # non-root users + sensitive_metadata_keys = ('security-credentials',) + def __init__(self, sys_cfg, distro, paths, ud_proc=None): self.sys_cfg = sys_cfg self.distro = distro @@ -152,12 +184,24 @@ class DataSource(object): def _get_standardized_metadata(self): """Return a dictionary of standardized metadata keys.""" - return {'v1': { - 'local-hostname': self.get_hostname(), - 'instance-id': self.get_instance_id(), - 'cloud-name': self.cloud_name, - 'region': self.region, - 'availability-zone': self.availability_zone}} + local_hostname = self.get_hostname() + instance_id = self.get_instance_id() + availability_zone = self.availability_zone + cloud_name = self.cloud_name + # When adding new standard keys prefer underscore-delimited instead + # of hyphen-delimted to support simple variable references in jinja + # templates. + return { + 'v1': { + 'availability-zone': availability_zone, + 'availability_zone': availability_zone, + 'cloud-name': cloud_name, + 'cloud_name': cloud_name, + 'instance-id': instance_id, + 'instance_id': instance_id, + 'local-hostname': local_hostname, + 'local_hostname': local_hostname, + 'region': self.region}} def clear_cached_attrs(self, attr_defaults=()): """Reset any cached metadata attributes to datasource defaults. @@ -200,9 +244,7 @@ class DataSource(object): """ instance_data = { 'ds': { - 'meta_data': self.metadata, - 'user_data': self.get_userdata_raw(), - 'vendor_data': self.get_vendordata_raw()}} + 'meta_data': self.metadata}} if hasattr(self, 'network_json'): network_json = getattr(self, 'network_json') if network_json != UNSET: @@ -217,7 +259,9 @@ class DataSource(object): # Process content base64encoding unserializable values content = util.json_dumps(instance_data) # Strip base64: prefix and set base64_encoded_keys list. - processed_data = process_instance_metadata(json.loads(content)) + processed_data = process_instance_metadata( + json.loads(content), + sensitive_keys=self.sensitive_metadata_keys) except TypeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) return False @@ -225,7 +269,11 @@ class DataSource(object): LOG.warning('Error persisting instance-data.json: %s', str(e)) return False json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) - write_json(json_file, processed_data, mode=0o600) + write_json(json_file, processed_data) # World readable + json_sensitive_file = os.path.join(self.paths.run_dir, + INSTANCE_JSON_SENSITIVE_FILE) + write_json(json_sensitive_file, + redact_sensitive_keys(processed_data), mode=0o600) return True def _get_data(self): diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index 8299af23..6b965750 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -1,5 +1,6 @@ # This file is part of cloud-init. See LICENSE file for license information. +import copy import inspect import os import six @@ -9,7 +10,8 @@ from cloudinit.event import EventType from cloudinit.helpers import Paths from cloudinit import importer from cloudinit.sources import ( - INSTANCE_JSON_FILE, DataSource, UNSET) + INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE, + UNSET, DataSource, redact_sensitive_keys) from cloudinit.tests.helpers import CiTestCase, skipIf, mock from cloudinit.user_data import UserDataProcessor from cloudinit import util @@ -20,20 +22,24 @@ class DataSourceTestSubclassNet(DataSource): dsname = 'MyTestSubclass' url_max_wait = 55 - def __init__(self, sys_cfg, distro, paths, custom_userdata=None, - get_data_retval=True): + def __init__(self, sys_cfg, distro, paths, custom_metadata=None, + custom_userdata=None, get_data_retval=True): super(DataSourceTestSubclassNet, self).__init__( sys_cfg, distro, paths) self._custom_userdata = custom_userdata + self._custom_metadata = custom_metadata self._get_data_retval = get_data_retval def _get_cloud_name(self): return 'SubclassCloudName' def _get_data(self): - self.metadata = {'availability_zone': 'myaz', - 'local-hostname': 'test-subclass-hostname', - 'region': 'myregion'} + if self._custom_metadata: + self.metadata = self._custom_metadata + else: + self.metadata = {'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion'} if self._custom_userdata: self.userdata_raw = self._custom_userdata else: @@ -278,7 +284,7 @@ class TestDataSource(CiTestCase): os.path.exists(json_file), 'Found unexpected file %s' % json_file) def test_get_data_writes_json_instance_data_on_success(self): - """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root.""" + """get_data writes INSTANCE_JSON_FILE to run_dir as world readable.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp})) @@ -287,40 +293,90 @@ class TestDataSource(CiTestCase): content = util.load_file(json_file) expected = { 'base64_encoded_keys': [], + 'sensitive_keys': [], 'v1': { 'availability-zone': 'myaz', + 'availability_zone': 'myaz', 'cloud-name': 'subclasscloudname', + 'cloud_name': 'subclasscloudname', 'instance-id': 'iid-datasource', + 'instance_id': 'iid-datasource', 'local-hostname': 'test-subclass-hostname', + 'local_hostname': 'test-subclass-hostname', 'region': 'myregion'}, 'ds': { 'meta_data': {'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', - 'region': 'myregion'}, - 'user_data': 'userdata_raw', - 'vendor_data': 'vendordata_raw'}} - self.maxDiff = None + 'region': 'myregion'}}} self.assertEqual(expected, util.load_json(content)) file_stat = os.stat(json_file) + self.assertEqual(0o644, stat.S_IMODE(file_stat.st_mode)) + self.assertEqual(expected, util.load_json(content)) + + def test_get_data_writes_json_instance_data_sensitive(self): + """get_data writes INSTANCE_JSON_SENSITIVE_FILE as readonly root.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp}), + custom_metadata={ + 'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion', + 'some': {'security-credentials': { + 'cred1': 'sekret', 'cred2': 'othersekret'}}}) + self.assertEqual( + ('security-credentials',), datasource.sensitive_metadata_keys) + datasource.get_data() + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + sensitive_json_file = self.tmp_path(INSTANCE_JSON_SENSITIVE_FILE, tmp) + redacted = util.load_json(util.load_file(json_file)) + self.assertEqual( + {'cred1': 'sekret', 'cred2': 'othersekret'}, + redacted['ds']['meta_data']['some']['security-credentials']) + content = util.load_file(sensitive_json_file) + expected = { + 'base64_encoded_keys': [], + 'sensitive_keys': ['ds/meta_data/some/security-credentials'], + 'v1': { + 'availability-zone': 'myaz', + 'availability_zone': 'myaz', + 'cloud-name': 'subclasscloudname', + 'cloud_name': 'subclasscloudname', + 'instance-id': 'iid-datasource', + 'instance_id': 'iid-datasource', + 'local-hostname': 'test-subclass-hostname', + 'local_hostname': 'test-subclass-hostname', + 'region': 'myregion'}, + 'ds': { + 'meta_data': { + 'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion', + 'some': {'security-credentials': REDACT_SENSITIVE_VALUE}}} + } + self.maxDiff = None + self.assertEqual(expected, util.load_json(content)) + file_stat = os.stat(sensitive_json_file) self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode)) + self.assertEqual(expected, util.load_json(content)) def test_get_data_handles_redacted_unserializable_content(self): """get_data warns unserializable content in INSTANCE_JSON_FILE.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) datasource.get_data() json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) - expected_userdata = { + expected_metadata = { 'key1': 'val1', 'key2': { 'key2.1': "Warning: redacted unserializable type "}} instance_json = util.load_json(content) self.assertEqual( - expected_userdata, instance_json['ds']['user_data']) + expected_metadata, instance_json['ds']['meta_data']) def test_persist_instance_data_writes_ec2_metadata_when_set(self): """When ec2_metadata class attribute is set, persist to json.""" @@ -361,17 +417,17 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) instance_json = util.load_json(content) - self.assertEqual( - ['ds/user_data/key2/key2.1'], + self.assertItemsEqual( + ['ds/meta_data/key2/key2.1'], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': 'EjM='}}, - instance_json['ds']['user_data']) + instance_json['ds']['meta_data']) @skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes") def test_get_data_handles_bytes_values(self): @@ -379,7 +435,7 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) @@ -387,7 +443,7 @@ class TestDataSource(CiTestCase): self.assertEqual([], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': '\x123'}}, - instance_json['ds']['user_data']) + instance_json['ds']['meta_data']) @skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8") def test_non_utf8_encoding_logs_warning(self): @@ -395,7 +451,7 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) self.assertFalse(os.path.exists(json_file)) @@ -509,4 +565,36 @@ class TestDataSource(CiTestCase): self.logs.getvalue()) +class TestRedactSensitiveData(CiTestCase): + + def test_redact_sensitive_data_noop_when_no_sensitive_keys_present(self): + """When sensitive_keys is absent or empty from metadata do nothing.""" + md = {'my': 'data'} + self.assertEqual( + md, redact_sensitive_keys(md, redact_value='redacted')) + md['sensitive_keys'] = [] + self.assertEqual( + md, redact_sensitive_keys(md, redact_value='redacted')) + + def test_redact_sensitive_data_redacts_exact_match_name(self): + """Only exact matched sensitive_keys are redacted from metadata.""" + md = {'sensitive_keys': ['md/secure'], + 'md': {'secure': 's3kr1t', 'insecure': 'publik'}} + secure_md = copy.deepcopy(md) + secure_md['md']['secure'] = 'redacted' + self.assertEqual( + secure_md, + redact_sensitive_keys(md, redact_value='redacted')) + + def test_redact_sensitive_data_does_redacts_with_default_string(self): + """When redact_value is absent, REDACT_SENSITIVE_VALUE is used.""" + md = {'sensitive_keys': ['md/secure'], + 'md': {'secure': 's3kr1t', 'insecure': 'publik'}} + secure_md = copy.deepcopy(md) + secure_md['md']['secure'] = 'redacted for non-root user' + self.assertEqual( + secure_md, + redact_sensitive_keys(md)) + + # vi: ts=4 expandtab diff --git a/doc/rtd/index.rst b/doc/rtd/index.rst index de67f361..20a99a30 100644 --- a/doc/rtd/index.rst +++ b/doc/rtd/index.rst @@ -31,6 +31,7 @@ initialization of a cloud instance. topics/capabilities.rst topics/availability.rst topics/format.rst + topics/instancedata.rst topics/dir_layout.rst topics/examples.rst topics/boot.rst diff --git a/doc/rtd/topics/capabilities.rst b/doc/rtd/topics/capabilities.rst index 2d8e2538..0d8b8947 100644 --- a/doc/rtd/topics/capabilities.rst +++ b/doc/rtd/topics/capabilities.rst @@ -18,7 +18,7 @@ User configurability User-data can be given by the user at instance launch time. See :ref:`user_data_formats` for acceptable user-data content. - + This is done via the ``--user-data`` or ``--user-data-file`` argument to ec2-run-instances for example. @@ -53,10 +53,9 @@ system: % cloud-init --help usage: cloud-init [-h] [--version] [--file FILES] - [--debug] [--force] - {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} - ... + {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status} + ... optional arguments: -h, --help show this help message and exit @@ -68,17 +67,19 @@ system: your own risk) Subcommands: - {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} + {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status} init initializes cloud-init and performs initial modules modules activates modules using a given configuration key single run a single module + query Query instance metadata from the command line dhclient-hook run the dhclient hookto record network info features list defined features analyze Devel tool: Analyze cloud-init logs and data devel Run development tools collect-logs Collect and tar all cloud-init debug info - clean Remove logs and artifacts so cloud-init can re-run. - status Report cloud-init status or wait on completion. + clean Remove logs and artifacts so cloud-init can re-run + status Report cloud-init status or wait on completion + CLI Subcommand details ====================== @@ -104,8 +105,8 @@ cloud-init status Report whether cloud-init is running, done, disabled or errored. Exits non-zero if an error is detected in cloud-init. - * **--long**: Detailed status information. - * **--wait**: Block until cloud-init completes. +* **--long**: Detailed status information. +* **--wait**: Block until cloud-init completes. .. code-block:: shell-session @@ -143,6 +144,68 @@ Logs collected are: * journalctl output * /var/lib/cloud/instance/user-data.txt +.. _cli_query: + +cloud-init query +------------------ +Query standardized cloud instance metadata crawled by cloud-init and stored +in ``/run/cloud-init/instance-data.json``. This is a convenience command-line +interface to reference any cached configuration metadata that cloud-init +crawls when booting the instance. See :ref:`instance_metadata` for more info. + +* **--all**: Dump all available instance data as json which can be queried. +* **--instance-data**: Optional path to a different instance-data.json file to + source for queries. +* **--list-keys**: List available query keys from cached instance data. + +.. code-block:: shell-session + + # List all top-level query keys available (includes standardized aliases) + % cloud-init query --list-keys + availability_zone + base64_encoded_keys + cloud_name + ds + instance_id + local_hostname + region + v1 + +* ****: A dot-delimited variable path into the instance-data.json + object. + +.. code-block:: shell-session + + # Query cloud-init standardized metadata on any cloud + % cloud-init query v1.cloud_name + aws # or openstack, azure, gce etc. + + # Any standardized instance-data under a key is aliased as a top-level + # key for convenience. + % cloud-init query cloud_name + aws # or openstack, azure, gce etc. + + # Query datasource-specific metadata on EC2 + % cloud-init query ds.meta_data.public_ipv4 + +* **--format** A string that will use jinja-template syntax to render a string + replacing + +.. code-block:: shell-session + + # Generate a custom hostname fqdn based on instance-id, cloud and region + % cloud-init query --format 'custom-{{instance_id}}.{{region}}.{{v1.cloud_name}}.com' + custom-i-0e91f69987f37ec74.us-east-2.aws.com + + +.. note:: + The standardized instance data keys under **v#** are guaranteed not to change + behavior or format. If using top-level convenience aliases for any + standardized instance data keys, the most value (highest **v#**) of that key + name is what is reported as the top-level value. So these aliases act as a + 'latest'. + + .. _cli_analyze: cloud-init analyze @@ -150,10 +213,10 @@ cloud-init analyze Get detailed reports of where cloud-init spends most of its time. See :ref:`boot_time_analysis` for more info. - * **blame** Report ordered by most costly operations. - * **dump** Machine-readable JSON dump of all cloud-init tracked events. - * **show** show time-ordered report of the cost of operations during each - boot stage. +* **blame** Report ordered by most costly operations. +* **dump** Machine-readable JSON dump of all cloud-init tracked events. +* **show** show time-ordered report of the cost of operations during each + boot stage. .. _cli_devel: @@ -182,8 +245,8 @@ cloud-init clean Remove cloud-init artifacts from /var/lib/cloud and optionally reboot the machine to so cloud-init re-runs all stages as it did on first boot. - * **--logs**: Optionally remove /var/log/cloud-init*log files. - * **--reboot**: Reboot the system after removing artifacts. +* **--logs**: Optionally remove /var/log/cloud-init*log files. +* **--reboot**: Reboot the system after removing artifacts. .. _cli_init: @@ -195,7 +258,7 @@ Can be run on the commandline, but is generally gated to run only once due to semaphores in **/var/lib/cloud/instance/sem/** and **/var/lib/cloud/sem**. - * **--local**: Run *init-local* stage instead of *init*. +* **--local**: Run *init-local* stage instead of *init*. .. _cli_modules: @@ -210,8 +273,8 @@ declared to run in various boot stages in the file commandline, but each module is gated to run only once due to semaphores in ``/var/lib/cloud/``. - * **--mode (init|config|final)**: Run *modules:init*, *modules:config* or - *modules:final* cloud-init stages. See :ref:`boot_stages` for more info. +* **--mode (init|config|final)**: Run *modules:init*, *modules:config* or + *modules:final* cloud-init stages. See :ref:`boot_stages` for more info. .. _cli_single: @@ -221,9 +284,9 @@ Attempt to run a single named cloud config module. The following example re-runs the cc_set_hostname module ignoring the module default frequency of once-per-instance: - * **--name**: The cloud-config module name to run - * **--frequency**: Optionally override the declared module frequency - with one of (always|once-per-instance|once) +* **--name**: The cloud-config module name to run +* **--frequency**: Optionally override the declared module frequency + with one of (always|once-per-instance|once) .. code-block:: shell-session diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index 14432e65..e34f145c 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -17,146 +17,10 @@ own way) internally a datasource abstract class was created to allow for a single way to access the different cloud systems methods to provide this data through the typical usage of subclasses. - -.. _instance_metadata: - -instance-data -------------- -For reference, cloud-init stores all the metadata, vendordata and userdata -provided by a cloud in a json blob at ``/run/cloud-init/instance-data.json``. -While the json contains datasource-specific keys and names, cloud-init will -maintain a minimal set of standardized keys that will remain stable on any -cloud. Standardized instance-data keys will be present under a "v1" key. -Any datasource metadata cloud-init consumes will all be present under the -"ds" key. - -Below is an instance-data.json example from an OpenStack instance: - -.. sourcecode:: json - - { - "base64-encoded-keys": [ - "ds/meta-data/random_seed", - "ds/user-data" - ], - "ds": { - "ec2_metadata": { - "ami-id": "ami-0000032f", - "ami-launch-index": "0", - "ami-manifest-path": "FIXME", - "block-device-mapping": { - "ami": "vda", - "ephemeral0": "/dev/vdb", - "root": "/dev/vda" - }, - "hostname": "xenial-test.novalocal", - "instance-action": "none", - "instance-id": "i-0006e030", - "instance-type": "m1.small", - "local-hostname": "xenial-test.novalocal", - "local-ipv4": "10.5.0.6", - "placement": { - "availability-zone": "None" - }, - "public-hostname": "xenial-test.novalocal", - "public-ipv4": "10.245.162.145", - "reservation-id": "r-fxm623oa", - "security-groups": "default" - }, - "meta-data": { - "availability_zone": null, - "devices": [], - "hostname": "xenial-test.novalocal", - "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0", - "launch_index": 0, - "local-hostname": "xenial-test.novalocal", - "name": "xenial-test", - "project_id": "e0eb2d2538814...", - "random_seed": "A6yPN...", - "uuid": "3e39d278-0644-4728-9479-678f92..." - }, - "network_json": { - "links": [ - { - "ethernet_mac_address": "fa:16:3e:7d:74:9b", - "id": "tap9ca524d5-6e", - "mtu": 8958, - "type": "ovs", - "vif_id": "9ca524d5-6e5a-4809-936a-6901..." - } - ], - "networks": [ - { - "id": "network0", - "link": "tap9ca524d5-6e", - "network_id": "c6adfc18-9753-42eb-b3ea-18b57e6b837f", - "type": "ipv4_dhcp" - } - ], - "services": [ - { - "address": "10.10.160.2", - "type": "dns" - } - ] - }, - "user-data": "I2Nsb3VkLWNvbmZpZ...", - "vendor-data": null - }, - "v1": { - "availability-zone": null, - "cloud-name": "openstack", - "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0", - "local-hostname": "xenial-test", - "region": null - } - } - - -As of cloud-init v. 18.4, any values present in -``/run/cloud-init/instance-data.json`` can be used in cloud-init user data -scripts or cloud config data. This allows consumers to use cloud-init's -vendor-neutral, standardized metadata keys as well as datasource-specific -content for any scripts or cloud-config modules they are using. - -To use instance-data.json values in scripts and **#config-config** files the -user-data will need to contain the following header as the first line **## template: jinja**. Cloud-init will source all variables defined in -``/run/cloud-init/instance-data.json`` and allow scripts or cloud-config files -to reference those paths. Below are two examples:: - - * Cloud config calling home with the ec2 public hostname and avaliability-zone - ``` - ## template: jinja - #cloud-config - runcmd: - - echo 'EC2 public hostname allocated to instance: {{ ds.meta_data.public_hostname }}' > /tmp/instance_metadata - - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> /tmp/instance_metadata - - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", "availability-zone": "{{ v1.availability_zone }}"}' https://example.com.com - ``` - - * Custom user script performing different operations based on region - ``` - ## template: jinja - #!/bin/bash - {% if v1.region == 'us-east-2' -%} - echo 'Installing custom proxies for {{ v1.region }} - sudo apt-get install my-xtra-fast-stack - {%- endif %} - ... - - ``` - -.. note:: - Trying to reference jinja variables that don't exist in - instance-data.json will result in warnings in ``/var/log/cloud-init.log`` - and the following string in your rendered user-data: - ``CI_MISSING_JINJA_VAR/``. - -.. note:: - To save time designing your user-data for a specific cloud's - instance-data.json, use the 'render' cloud-init command on an - instance booted on your favorite cloud. See :ref:`cli_devel` for more - information. +Any metadata processed by cloud-init's datasources is persisted as +``/run/cloud0-init/instance-data.json``. Cloud-init provides tooling +to quickly introspect some of that data. See :ref:`instance_metadata` for +more information. Datasource API @@ -196,14 +60,14 @@ The current interface that a datasource object must provide is the following: # or does not exist) def device_name_to_device(self, name) - # gets the locale string this instance should be applying + # gets the locale string this instance should be applying # which typically used to adjust the instances locale settings files def get_locale(self) @property def availability_zone(self) - # gets the instance id that was assigned to this instance by the + # gets the instance id that was assigned to this instance by the # cloud provider or when said instance id does not exist in the backing # metadata this will return 'iid-datasource' def get_instance_id(self) diff --git a/doc/rtd/topics/instancedata.rst b/doc/rtd/topics/instancedata.rst new file mode 100644 index 00000000..634e1807 --- /dev/null +++ b/doc/rtd/topics/instancedata.rst @@ -0,0 +1,297 @@ +.. _instance_metadata: + +***************** +Instance Metadata +***************** + +What is a instance data? +======================== + +Instance data is the collection of all configuration data that cloud-init +processes to configure the instance. This configuration typically +comes from any number of sources: + +* cloud-provided metadata services (aka metadata) +* custom config-drive attached to the instance +* cloud-config seed files in the booted cloud image or distribution +* vendordata provided from files or cloud metadata services +* userdata provided at instance creation + +Each cloud provider presents unique configuration metadata in different +formats to the instance. Cloud-init provides a cache of any crawled metadata +as well as a versioned set of standardized instance data keys which it makes +available on all platforms. + +Cloud-init produces a simple json object in +``/run/cloud-init/instance-data.json`` which represents standardized and +versioned representation of the metadata it consumes during initial boot. The +intent is to provide the following benefits to users or scripts on any system +deployed with cloud-init: + +* simple static object to query to obtain a instance's metadata +* speed: avoid costly network transactions for metadata that is already cached + on the filesytem +* reduce need to recrawl metadata services for static metadata that is already + cached +* leverage cloud-init's best practices for crawling cloud-metadata services +* avoid rolling unique metadata crawlers on each cloud platform to get + metadata configuration values + +Cloud-init stores any instance data processed in the following files: + +* ``/run/cloud-init/instance-data.json``: world-readable json containing + standardized keys, sensitive keys redacted +* ``/run/cloud-init/instance-data-sensitive.json``: root-readable unredacted + json blob +* ``/var/lib/cloud/instance/user-data.txt``: root-readable sensitive raw + userdata +* ``/var/lib/cloud/instance/vendor-data.txt``: root-readable sensitive raw + vendordata + +Cloud-init redacts any security sensitive content from instance-data.json, +stores ``/run/cloud-init/instance-data.json`` as a world-readable json file. +Because user-data and vendor-data can contain passwords both of these files +are readonly for *root* as well. The *root* user can also read +``/run/cloud-init/instance-data-sensitive.json`` which is all instance data +from instance-data.json as well as unredacted sensitive content. + + +Format of instance-data.json +============================ + +The instance-data.json and instance-data-sensitive.json files are well-formed +JSON and record the set of keys and values for any metadata processed by +cloud-init. Cloud-init standardizes the format for this content so that it +can be generalized across different cloud platforms. + +There are three basic top-level keys: + +* **base64_encoded_keys**: A list of forward-slash delimited key paths into + the instance-data.json object whose value is base64encoded for json + compatibility. Values at these paths should be decoded to get the original + value. + +* **sensitive_keys**: A list of forward-slash delimited key paths into + the instance-data.json object whose value is considered by the datasource as + 'security sensitive'. Only the keys listed here will be redacted from + instance-data.json for non-root users. + +* **ds**: Datasource-specific metadata crawled for the specific cloud + platform. It should closely represent the structure of the cloud metadata + crawled. The structure of content and details provided are entirely + cloud-dependent. Mileage will vary depending on what the cloud exposes. + The content exposed under the 'ds' key is currently **experimental** and + expected to change slightly in the upcoming cloud-init release. + +* **v1**: Standardized cloud-init metadata keys, these keys are guaranteed to + exist on all cloud platforms. They will also retain their current behavior + and format and will be carried forward even if cloud-init introduces a new + version of standardized keys with **v2**. + +The standardized keys present: + ++----------------------+-----------------------------------------------+---------------------------+ +| Key path | Description | Examples | ++======================+===============================================+===========================+ +| v1.cloud_name | The name of the cloud provided by metadata | aws, openstack, azure, | +| | key 'cloud-name' or the cloud-init datasource | configdrive, nocloud, | +| | name which was discovered. | ovf, etc. | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.instance_id | Unique instance_id allocated by the cloud | i- | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.local_hostname | The internal or local hostname of the system | ip-10-41-41-70, | +| | | | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.region | The physical region/datacenter in which the | us-east-2 | +| | instance is deployed | | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.availability_zone | The physical availability zone in which the | us-east-2b, nova, null | +| | instance is deployed | | ++----------------------+-----------------------------------------------+---------------------------+ + + +Below is an example of ``/run/cloud-init/instance_data.json`` on an EC2 +instance: + +.. sourcecode:: json + + { + "base64_encoded_keys": [], + "sensitive_keys": [], + "ds": { + "meta_data": { + "ami-id": "ami-014e1416b628b0cbf", + "ami-launch-index": "0", + "ami-manifest-path": "(unknown)", + "block-device-mapping": { + "ami": "/dev/sda1", + "ephemeral0": "sdb", + "ephemeral1": "sdc", + "root": "/dev/sda1" + }, + "hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "instance-action": "none", + "instance-id": "i-04fa31cfc55aa7976", + "instance-type": "t2.micro", + "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "local-ipv4": "10.41.41.70", + "mac": "06:b6:92:dd:9d:24", + "metrics": { + "vhostmd": "" + }, + "network": { + "interfaces": { + "macs": { + "06:b6:92:dd:9d:24": { + "device-number": "0", + "interface-id": "eni-08c0c9fdb99b6e6f4", + "ipv4-associations": { + "18.224.22.43": "10.41.41.70" + }, + "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "local-ipv4s": "10.41.41.70", + "mac": "06:b6:92:dd:9d:24", + "owner-id": "437526006925", + "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", + "public-ipv4s": "18.224.22.43", + "security-group-ids": "sg-828247e9", + "security-groups": "Cloud-init integration test secgroup", + "subnet-id": "subnet-282f3053", + "subnet-ipv4-cidr-block": "10.41.41.0/24", + "subnet-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/64", + "vpc-id": "vpc-252ef24d", + "vpc-ipv4-cidr-block": "10.41.0.0/16", + "vpc-ipv4-cidr-blocks": "10.41.0.0/16", + "vpc-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/56" + } + } + } + }, + "placement": { + "availability-zone": "us-east-2b" + }, + "profile": "default-hvm", + "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", + "public-ipv4": "18.224.22.43", + "public-keys": { + "cloud-init-integration": [ + "ssh-rsa + AAAAB3NzaC1yc2EAAAADAQABAAABAQDSL7uWGj8cgWyIOaspgKdVy0cKJ+UTjfv7jBOjG2H/GN8bJVXy72XAvnhM0dUM+CCs8FOf0YlPX+Frvz2hKInrmRhZVwRSL129PasD12MlI3l44u6IwS1o/W86Q+tkQYEljtqDOo0a+cOsaZkvUNzUyEXUwz/lmYa6G4hMKZH4NBj7nbAAF96wsMCoyNwbWryBnDYUr6wMbjRR1J9Pw7Xh7WRC73wy4Va2YuOgbD3V/5ZrFPLbWZW/7TFXVrql04QVbyei4aiFR5n//GvoqwQDNe58LmbzX/xvxyKJYdny2zXmdAhMxbrpFQsfpkJ9E/H5w0yOdSvnWbUoG5xNGoOB + cloud-init-integration" + ] + }, + "reservation-id": "r-06ab75e9346f54333", + "security-groups": "Cloud-init integration test secgroup", + "services": { + "domain": "amazonaws.com", + "partition": "aws" + } + } + }, + "v1": { + "availability-zone": "us-east-2b", + "availability_zone": "us-east-2b", + "cloud-name": "aws", + "cloud_name": "aws", + "instance-id": "i-04fa31cfc55aa7976", + "instance_id": "i-04fa31cfc55aa7976", + "local-hostname": "ip-10-41-41-70", + "local_hostname": "ip-10-41-41-70", + "region": "us-east-2" + } + } + + +Using instance-data +=================== + +As of cloud-init v. 18.4, any variables present in +``/run/cloud-init/instance-data.json`` can be used in: + +* User-data scripts +* Cloud config data +* Command line interface via **cloud-init query** or + **cloud-init devel render** + +Many clouds allow users to provide user-data to an instance at +the time the instance is launched. Cloud-init supports a number of +:ref:`user_data_formats`. + +Both user-data scripts and **#cloud-config** data support jinja template +rendering. +When the first line of the provided user-data begins with, +**## template: jinja** cloud-init will use jinja to render that file. +Any instance-data-sensitive.json variables are surfaced as dot-delimited +jinja template variables because cloud-config modules are run as 'root' +user. + + +Below are some examples of providing these types of user-data: + +* Cloud config calling home with the ec2 public hostname and avaliability-zone + +.. code-block:: shell-session + + ## template: jinja + #cloud-config + runcmd: + - echo 'EC2 public hostname allocated to instance: {{ + ds.meta_data.public_hostname }}' > /tmp/instance_metadata + - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> + /tmp/instance_metadata + - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", + "availability-zone": "{{ v1.availability_zone }}"}' + https://example.com + +* Custom user-data script performing different operations based on region + +.. code-block:: shell-session + + ## template: jinja + #!/bin/bash + {% if v1.region == 'us-east-2' -%} + echo 'Installing custom proxies for {{ v1.region }} + sudo apt-get install my-xtra-fast-stack + {%- endif %} + ... + +.. note:: + Trying to reference jinja variables that don't exist in + instance-data.json will result in warnings in ``/var/log/cloud-init.log`` + and the following string in your rendered user-data: + ``CI_MISSING_JINJA_VAR/``. + +Cloud-init also surfaces a commandline tool **cloud-init query** which can +assist developers or scripts with obtaining instance metadata easily. See +:ref:`cli_query` for more information. + +To cut down on keystrokes on the command line, cloud-init also provides +top-level key aliases for any standardized ``v#`` keys present. The preceding +``v1`` is not required of ``v1.var_name`` These aliases will represent the +value of the highest versioned standard key. For example, ``cloud_name`` +value will be ``v2.cloud_name`` if both ``v1`` and ``v2`` keys are present in +instance-data.json. +The **query** command also publishes ``userdata`` and ``vendordata`` keys to +the root user which will contain the decoded user and vendor data provided to +this instance. Non-root users referencing userdata or vendordata keys will +see only redacted values. + +.. code-block:: shell-session + + # List all top-level instance-data keys available + % cloud-init query --list-keys + + # Find your EC2 ami-id + % cloud-init query ds.metadata.ami_id + + # Format your cloud_name and region using jinja template syntax + % cloud-init query --format 'cloud: {{ v1.cloud_name }} myregion: {{ + % v1.region }}' + +.. note:: + To save time designing a user-data template for a specific cloud's + instance-data.json, use the 'render' cloud-init command on an + instance booted on your favorite cloud. See :ref:`cli_devel` for more + information. + +.. vi: textwidth=78 diff --git a/integration-requirements.txt b/integration-requirements.txt index f80cb942..880d9886 100644 --- a/integration-requirements.txt +++ b/integration-requirements.txt @@ -5,16 +5,17 @@ # the packages/pkg-deps.json file as well. # +unittest2 # ec2 backend boto3==1.5.9 # ssh communication paramiko==2.4.1 + # lxd backend # 04/03/2018: enables use of lxd 3.0 git+https://github.com/lxc/pylxd.git@4b8ab1802f9aee4eb29cf7b119dae0aa47150779 - # finds latest image information git+https://git.launchpad.net/simplestreams diff --git a/tests/cloud_tests/testcases/base.py b/tests/cloud_tests/testcases/base.py index 27458271..c5457968 100644 --- a/tests/cloud_tests/testcases/base.py +++ b/tests/cloud_tests/testcases/base.py @@ -5,15 +5,15 @@ import crypt import json import re -import unittest +import unittest2 from cloudinit import util as c_util -SkipTest = unittest.SkipTest +SkipTest = unittest2.SkipTest -class CloudTestCase(unittest.TestCase): +class CloudTestCase(unittest2.TestCase): """Base test class for verifiers.""" # data gets populated in get_suite.setUpClass @@ -167,8 +167,9 @@ class CloudTestCase(unittest.TestCase): 'Skipping instance-data.json test.' ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) - self.assertEqual( - ['ds/user_data'], instance_data['base64_encoded_keys']) + self.assertItemsEqual( + [], + instance_data['base64_encoded_keys']) ds = instance_data.get('ds', {}) v1_data = instance_data.get('v1', {}) metadata = ds.get('meta-data', {}) @@ -187,10 +188,10 @@ class CloudTestCase(unittest.TestCase): metadata.get('placement', {}).get('availability-zone'), 'Could not determine EC2 Availability zone placement') self.assertIsNotNone( - v1_data['availability-zone'], 'expected ec2 availability-zone') - self.assertEqual('aws', v1_data['cloud-name']) - self.assertIn('i-', v1_data['instance-id']) - self.assertIn('ip-', v1_data['local-hostname']) + v1_data['availability_zone'], 'expected ec2 availability_zone') + self.assertEqual('aws', v1_data['cloud_name']) + self.assertIn('i-', v1_data['instance_id']) + self.assertIn('ip-', v1_data['local_hostname']) self.assertIsNotNone(v1_data['region'], 'expected ec2 region') def test_instance_data_json_lxd(self): @@ -213,16 +214,14 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) - self.assertEqual( - ['ds/user_data', 'ds/vendor_data'], - sorted(instance_data['base64_encoded_keys'])) - self.assertEqual('nocloud', v1_data['cloud-name']) + self.assertItemsEqual([], sorted(instance_data['base64_encoded_keys'])) + self.assertEqual('nocloud', v1_data['cloud_name']) self.assertIsNone( - v1_data['availability-zone'], - 'found unexpected lxd availability-zone %s' % - v1_data['availability-zone']) - self.assertIn('cloud-test', v1_data['instance-id']) - self.assertIn('cloud-test', v1_data['local-hostname']) + v1_data['availability_zone'], + 'found unexpected lxd availability_zone %s' % + v1_data['availability_zone']) + self.assertIn('cloud-test', v1_data['instance_id']) + self.assertIn('cloud-test', v1_data['local_hostname']) self.assertIsNone( v1_data['region'], 'found unexpected lxd region %s' % v1_data['region']) @@ -248,18 +247,17 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) - self.assertEqual( - ['ds/user_data'], instance_data['base64_encoded_keys']) - self.assertEqual('nocloud', v1_data['cloud-name']) + self.assertItemsEqual([], instance_data['base64_encoded_keys']) + self.assertEqual('nocloud', v1_data['cloud_name']) self.assertIsNone( - v1_data['availability-zone'], - 'found unexpected kvm availability-zone %s' % - v1_data['availability-zone']) + v1_data['availability_zone'], + 'found unexpected kvm availability_zone %s' % + v1_data['availability_zone']) self.assertIsNotNone( re.match(r'[\da-f]{8}(-[\da-f]{4}){3}-[\da-f]{12}', - v1_data['instance-id']), - 'kvm instance-id is not a UUID: %s' % v1_data['instance-id']) - self.assertIn('ubuntu', v1_data['local-hostname']) + v1_data['instance_id']), + 'kvm instance_id is not a UUID: %s' % v1_data['instance_id']) + self.assertIn('ubuntu', v1_data['local_hostname']) self.assertIsNone( v1_data['region'], 'found unexpected lxd region %s' % v1_data['region']) -- cgit v1.2.3