From 2320c3de2712c2f320b0d8af4aa129219cc2ad04 Mon Sep 17 00:00:00 2001 From: Andy Liu Date: Fri, 24 Aug 2018 22:25:37 +0000 Subject: logging: Add logging config type hyperv for reporting via Azure KVP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux guests can provide information to Hyper-V hosts via KVP. KVP allows the guests to provide any string key-value-pairs back to the host's registry. On linux, kvp communication pools are presented as pool files in /var/lib/hyperv/.kvp_pool_#. The following reporting configuration can enable this kvp reporting in addition to default logging if the pool files exist: reporting:     logging:         type: log     telemetry:         type: hyperv --- cloudinit/cmd/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'cloudinit/cmd/main.py') diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index d6ba90f4..c0edee18 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -315,7 +315,7 @@ def main_init(name, args): existing = "trust" init.purge_cache() - # Delete the non-net file as well + # Delete the no-net file as well util.del_file(os.path.join(path_helper.get_cpath("data"), "no-net")) # Stage 5 @@ -339,7 +339,7 @@ def main_init(name, args): " Likely bad things to come!")) if not args.force: init.apply_network_config(bring_up=not args.local) - LOG.debug("[%s] Exiting without datasource in local mode", mode) + LOG.debug("[%s] Exiting without datasource", mode) if mode == sources.DSMODE_LOCAL: return (None, []) else: -- cgit v1.2.3 From 43e51a04515686a15c410d1a16dd5ff06fd1afd4 Mon Sep 17 00:00:00 2001 From: Ryan Harper Date: Fri, 31 Aug 2018 21:47:18 +0000 Subject: hyperv_reporting_handler: simplify threaded publisher Switch the implementation to a daemon thread which uses a blocking get from the Queue. No additional locking or flag checking is needed since the Queue itself handles acquiring the lock as needed. cloud-init only has a single producer (the main thread calling publish) and the consumer will read all events in the queue and write them out. Using the daemon mode of the thread handles flushing the queue on main exit in python3; in python2.7 we handle the EOFError that results when the publish thread calls to get() fails indicating the main thread has exited. The result is that the handler is no longer spawing a thread on each publish event but rather creates a single thread when we start up the reporter and we remove any additional use of separate locks and flags as we only have a single Queue object and we're only calling queue.put() from main thread and queue.get() from consuming thread. --- cloudinit/cmd/main.py | 4 +++- cloudinit/reporting/__init__.py | 6 +++++ cloudinit/reporting/handlers.py | 49 +++++++++++++++++------------------------ 3 files changed, 29 insertions(+), 30 deletions(-) (limited to 'cloudinit/cmd/main.py') diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index c0edee18..4ea4fe7f 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -877,9 +877,11 @@ def main(sysv_args=None): rname, rdesc, reporting_enabled=report_on) with args.reporter: - return util.log_time( + retval = util.log_time( logfunc=LOG.debug, msg="cloud-init mode '%s'" % name, get_uptime=True, func=functor, args=(name, args)) + reporting.flush_events() + return retval if __name__ == '__main__': diff --git a/cloudinit/reporting/__init__.py b/cloudinit/reporting/__init__.py index e047767e..ed5c7038 100644 --- a/cloudinit/reporting/__init__.py +++ b/cloudinit/reporting/__init__.py @@ -37,6 +37,12 @@ def update_configuration(config): instantiated_handler_registry.register_item(handler_name, instance) +def flush_events(): + for _, handler in instantiated_handler_registry.registered_items.items(): + if hasattr(handler, 'flush'): + handler.flush() + + instantiated_handler_registry = DictRegistry() update_configuration(DEFAULT_CONFIG) diff --git a/cloudinit/reporting/handlers.py b/cloudinit/reporting/handlers.py index 4b4bb396..6d23558e 100644 --- a/cloudinit/reporting/handlers.py +++ b/cloudinit/reporting/handlers.py @@ -16,10 +16,8 @@ from cloudinit import (url_helper, util) from datetime import datetime if six.PY2: - import multiprocessing.queues as queue from multiprocessing.queues import JoinableQueue as JQueue else: - import queue from queue import Queue as JQueue LOG = logging.getLogger(__name__) @@ -41,6 +39,10 @@ class ReportingHandler(object): def publish_event(self, event): """Publish an event.""" + def flush(self): + """Ensure ReportingHandler has published all events""" + pass + class LogHandler(ReportingHandler): """Publishes events to the cloud-init log at the ``DEBUG`` log level.""" @@ -134,15 +136,16 @@ class HyperVKvpReportingHandler(ReportingHandler): super(HyperVKvpReportingHandler, self).__init__() self._kvp_file_path = kvp_file_path self._event_types = event_types - self.running = False - self.queue_lock = threading.Lock() - self.running_lock = threading.Lock() self.q = JQueue() self.kvp_file = None self.incarnation_no = self._get_incarnation_no() self.event_key_prefix = u"{0}|{1}".format(self.EVENT_PREFIX, self.incarnation_no) self._current_offset = 0 + self.publish_thread = threading.Thread( + target=self._publish_event_routine) + self.publish_thread.daemon = True + self.publish_thread.start() def _get_incarnation_no(self): """ @@ -276,10 +279,8 @@ class HyperVKvpReportingHandler(ReportingHandler): def _publish_event_routine(self): while True: - event = None try: - # acquire the lock. - event = self.q.get_nowait() + event = self.q.get(block=True) need_append = True try: if not os.path.exists(self._kvp_file_path): @@ -302,41 +303,31 @@ class HyperVKvpReportingHandler(ReportingHandler): if int(match_groups[0]) < self.incarnation_no: need_append = False self._update_kvp_item(encoded_data) - break + continue if need_append: self._append_kvp_item(encoded_data) except IOError as e: LOG.warning( "failed posting event to kvp: %s e:%s", event.as_string(), e) - self.running = False - break finally: self.q.task_done() - except queue.Empty: - with self.queue_lock: - # double check the queue is empty - if self.q.empty(): - self.running = False - break - - def trigger_publish_event(self): - if not self.running: - with self.running_lock: - if not self.running: - self.running = True - thread = threading.Thread( - target=self._publish_event_routine) - thread.start() + + # when main process exits, q.get() will through EOFError + # indicating we should exit this thread. + except EOFError: + return # since the saving to the kvp pool can be a time costing task # if the kvp pool already contains a chunk of data, # so defer it to another thread. def publish_event(self, event): if (not self._event_types or event.event_type in self._event_types): - with self.queue_lock: - self.q.put(event) - self.trigger_publish_event() + self.q.put(event) + + def flush(self): + LOG.debug('HyperVReportingHandler flushing remaining events') + self.q.join() available_handlers = DictRegistry() -- cgit v1.2.3 From c7555762f3a30190ce7726b4d013bc3e83c7e4b6 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Tue, 11 Sep 2018 17:31:46 +0000 Subject: user-data: jinja template to render instance-data.json in cloud-config Allow users to provide '## template: jinja' as the first line or their #cloud-config or custom script user-data parts. When this header exists, the cloud-config or script will be rendered as a jinja template. All instance metadata keys and values present in /run/cloud-init/instance-data.json will be available as jinja variables for the template. This means any cloud-config module or script can reference any standardized instance data in templates and scripts. Additionally, any standardized instance-data.json keys scoped below a '' key will be promoted as a top-level key for ease of reference in templates. This means that '{{ local_hostname }}' is the same as using the latest '{{ v#.local_hostname }}'. Since instance-data is written to /run/cloud-init/instance-data.json, make sure it is persisted across reboots when the cached datasource opject is reloaded. LP: #1791781 --- bash_completion/cloud-init | 2 + cloudinit/cmd/devel/__init__.py | 25 ++ cloudinit/cmd/devel/parser.py | 5 +- cloudinit/cmd/devel/render.py | 90 ++++++ cloudinit/cmd/devel/tests/test_render.py | 101 +++++++ cloudinit/cmd/main.py | 16 +- cloudinit/handlers/__init__.py | 11 +- cloudinit/handlers/boot_hook.py | 12 +- cloudinit/handlers/cloud_config.py | 15 +- cloudinit/handlers/jinja_template.py | 137 +++++++++ cloudinit/handlers/shell_script.py | 9 +- cloudinit/handlers/upstart_job.py | 9 +- cloudinit/helpers.py | 4 + cloudinit/log.py | 12 +- cloudinit/sources/__init__.py | 47 ++- cloudinit/sources/tests/test_init.py | 75 ++++- cloudinit/stages.py | 22 +- cloudinit/templater.py | 28 +- cloudinit/tests/helpers.py | 9 + doc/rtd/topics/capabilities.rst | 15 +- doc/rtd/topics/datasources.rst | 47 +++ doc/rtd/topics/format.rst | 21 +- tests/cloud_tests/testcases/base.py | 8 +- tests/unittests/test_builtin_handlers.py | 324 +++++++++++++++++++-- .../test_handler/test_handler_etc_hosts.py | 1 + tests/unittests/test_handler/test_handler_ntp.py | 1 + tests/unittests/test_templating.py | 23 ++ 27 files changed, 959 insertions(+), 110 deletions(-) create mode 100755 cloudinit/cmd/devel/render.py create mode 100644 cloudinit/cmd/devel/tests/test_render.py create mode 100644 cloudinit/handlers/jinja_template.py (limited to 'cloudinit/cmd/main.py') diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init index f38164b0..b3a5ced3 100644 --- a/bash_completion/cloud-init +++ b/bash_completion/cloud-init @@ -62,6 +62,8 @@ _cloudinit_complete() net-convert) COMPREPLY=($(compgen -W "--help --network-data --kind --directory --output-kind" -- $cur_word)) ;; + render) + COMPREPLY=($(compgen -W "--help --instance-data --debug" -- $cur_word)) schema) COMPREPLY=($(compgen -W "--help --config-file --doc --annotate" -- $cur_word)) ;; diff --git a/cloudinit/cmd/devel/__init__.py b/cloudinit/cmd/devel/__init__.py index e69de29b..3ae28b69 100644 --- a/cloudinit/cmd/devel/__init__.py +++ b/cloudinit/cmd/devel/__init__.py @@ -0,0 +1,25 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Common cloud-init devel commandline utility functions.""" + + +import logging + +from cloudinit import log +from cloudinit.stages import Init + + +def addLogHandlerCLI(logger, log_level): + """Add a commandline logging handler to emit messages to stderr.""" + formatter = logging.Formatter('%(levelname)s: %(message)s') + log.setupBasicLogging(log_level, formatter=formatter) + return logger + + +def read_cfg_paths(): + """Return a Paths object based on the system configuration on disk.""" + init = Init(ds_deps=[]) + init.read_cfg() + return init.paths + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/devel/parser.py b/cloudinit/cmd/devel/parser.py index 40a4b019..99a234ce 100644 --- a/cloudinit/cmd/devel/parser.py +++ b/cloudinit/cmd/devel/parser.py @@ -8,6 +8,7 @@ import argparse from cloudinit.config import schema from . import net_convert +from . import render def get_parser(parser=None): @@ -22,7 +23,9 @@ def get_parser(parser=None): ('schema', 'Validate cloud-config files for document schema', schema.get_parser, schema.handle_schema_args), (net_convert.NAME, net_convert.__doc__, - net_convert.get_parser, net_convert.handle_args) + net_convert.get_parser, net_convert.handle_args), + (render.NAME, render.__doc__, + render.get_parser, render.handle_args) ] for (subcmd, helpmsg, get_parser, handler) in subcmds: parser = subparsers.add_parser(subcmd, help=helpmsg) diff --git a/cloudinit/cmd/devel/render.py b/cloudinit/cmd/devel/render.py new file mode 100755 index 00000000..e85933db --- /dev/null +++ b/cloudinit/cmd/devel/render.py @@ -0,0 +1,90 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Debug jinja template rendering of user-data.""" + +import argparse +import os +import sys + +from cloudinit.handlers.jinja_template import render_jinja_payload_from_file +from cloudinit import log +from cloudinit.sources import INSTANCE_JSON_FILE +from cloudinit import util +from . import addLogHandlerCLI, read_cfg_paths + +NAME = 'render' +DEFAULT_INSTANCE_DATA = '/run/cloud-init/instance-data.json' + +LOG = log.getLogger(NAME) + + +def get_parser(parser=None): + """Build or extend and arg parser for jinja render utility. + + @param parser: Optional existing ArgumentParser instance representing the + subcommand which will be extended to support the args of this utility. + + @returns: ArgumentParser with proper argument configuration. + """ + if not parser: + parser = argparse.ArgumentParser(prog=NAME, description=__doc__) + parser.add_argument( + 'user_data', type=str, help='Path to the user-data file to render') + parser.add_argument( + '-i', '--instance-data', type=str, + help=('Optional path to instance-data.json file. Defaults to' + ' /run/cloud-init/instance-data.json')) + parser.add_argument('-d', '--debug', action='store_true', default=False, + help='Add verbose messages during template render') + return parser + + +def handle_args(name, args): + """Render the provided user-data template file using instance-data values. + + Also setup CLI log handlers to report to stderr since this is a development + utility which should be run by a human on the CLI. + + @return 0 on success, 1 on failure. + """ + addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING) + if not args.instance_data: + paths = read_cfg_paths() + instance_data_fn = os.path.join( + paths.run_dir, INSTANCE_JSON_FILE) + else: + instance_data_fn = args.instance_data + try: + with open(instance_data_fn) as stream: + instance_data = stream.read() + instance_data = util.load_json(instance_data) + except IOError: + LOG.error('Missing instance-data.json file: %s', instance_data_fn) + return 1 + try: + with open(args.user_data) as stream: + user_data = stream.read() + except IOError: + LOG.error('Missing user-data file: %s', args.user_data) + return 1 + rendered_payload = render_jinja_payload_from_file( + payload=user_data, payload_fn=args.user_data, + instance_data_file=instance_data_fn, + debug=True if args.debug else False) + if not rendered_payload: + LOG.error('Unable to render user-data file: %s', args.user_data) + return 1 + sys.stdout.write(rendered_payload) + return 0 + + +def main(): + args = get_parser().parse_args() + return(handle_args(NAME, args)) + + +if __name__ == '__main__': + sys.exit(main()) + + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/devel/tests/test_render.py b/cloudinit/cmd/devel/tests/test_render.py new file mode 100644 index 00000000..fc5d2c0d --- /dev/null +++ b/cloudinit/cmd/devel/tests/test_render.py @@ -0,0 +1,101 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from six import StringIO +import os + +from collections import namedtuple +from cloudinit.cmd.devel import render +from cloudinit.helpers import Paths +from cloudinit.sources import INSTANCE_JSON_FILE +from cloudinit.tests.helpers import CiTestCase, mock, skipUnlessJinja +from cloudinit.util import ensure_dir, write_file + + +class TestRender(CiTestCase): + + with_logs = True + + args = namedtuple('renderargs', 'user_data instance_data debug') + + def setUp(self): + super(TestRender, self).setUp() + self.tmp = self.tmp_dir() + + def test_handle_args_error_on_missing_user_data(self): + """When user_data file path does not exist, log an error.""" + absent_file = self.tmp_path('user-data', dir=self.tmp) + instance_data = self.tmp_path('instance-data', dir=self.tmp) + write_file(instance_data, '{}') + args = self.args( + user_data=absent_file, instance_data=instance_data, debug=False) + with mock.patch('sys.stderr', new_callable=StringIO): + self.assertEqual(1, render.handle_args('anyname', args)) + self.assertIn( + 'Missing user-data file: %s' % absent_file, + self.logs.getvalue()) + + def test_handle_args_error_on_missing_instance_data(self): + """When instance_data file path does not exist, log an error.""" + user_data = self.tmp_path('user-data', dir=self.tmp) + absent_file = self.tmp_path('instance-data', dir=self.tmp) + args = self.args( + user_data=user_data, instance_data=absent_file, debug=False) + with mock.patch('sys.stderr', new_callable=StringIO): + self.assertEqual(1, render.handle_args('anyname', args)) + self.assertIn( + 'Missing instance-data.json file: %s' % absent_file, + self.logs.getvalue()) + + def test_handle_args_defaults_instance_data(self): + """When no instance_data argument, default to configured run_dir.""" + user_data = self.tmp_path('user-data', dir=self.tmp) + run_dir = self.tmp_path('run_dir', dir=self.tmp) + ensure_dir(run_dir) + paths = Paths({'run_dir': run_dir}) + self.add_patch('cloudinit.cmd.devel.render.read_cfg_paths', 'm_paths') + self.m_paths.return_value = paths + args = self.args( + user_data=user_data, instance_data=None, debug=False) + with mock.patch('sys.stderr', new_callable=StringIO): + self.assertEqual(1, render.handle_args('anyname', args)) + json_file = os.path.join(run_dir, INSTANCE_JSON_FILE) + self.assertIn( + 'Missing instance-data.json file: %s' % json_file, + self.logs.getvalue()) + + @skipUnlessJinja() + def test_handle_args_renders_instance_data_vars_in_template(self): + """If user_data file is a jinja template render instance-data vars.""" + user_data = self.tmp_path('user-data', dir=self.tmp) + write_file(user_data, '##template: jinja\nrendering: {{ my_var }}') + instance_data = self.tmp_path('instance-data', dir=self.tmp) + write_file(instance_data, '{"my-var": "jinja worked"}') + args = self.args( + user_data=user_data, instance_data=instance_data, debug=True) + with mock.patch('sys.stderr', new_callable=StringIO) as m_console_err: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, render.handle_args('anyname', args)) + self.assertIn( + 'DEBUG: Converted jinja variables\n{', self.logs.getvalue()) + self.assertIn( + 'DEBUG: Converted jinja variables\n{', m_console_err.getvalue()) + self.assertEqual('rendering: jinja worked', m_stdout.getvalue()) + + @skipUnlessJinja() + def test_handle_args_warns_and_gives_up_on_invalid_jinja_operation(self): + """If user_data file has invalid jinja operations log warnings.""" + user_data = self.tmp_path('user-data', dir=self.tmp) + write_file(user_data, '##template: jinja\nrendering: {{ my-var }}') + instance_data = self.tmp_path('instance-data', dir=self.tmp) + write_file(instance_data, '{"my-var": "jinja worked"}') + args = self.args( + user_data=user_data, instance_data=instance_data, debug=True) + with mock.patch('sys.stderr', new_callable=StringIO): + self.assertEqual(1, render.handle_args('anyname', args)) + self.assertIn( + 'WARNING: Ignoring jinja template for %s: Undefined jinja' + ' variable: "my-var". Jinja tried subtraction. Perhaps you meant' + ' "my_var"?' % user_data, + self.logs.getvalue()) + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 4ea4fe7f..0eee583c 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -348,6 +348,7 @@ def main_init(name, args): LOG.debug("[%s] barreling on in force mode without datasource", mode) + _maybe_persist_instance_data(init) # Stage 6 iid = init.instancify() LOG.debug("[%s] %s will now be targeting instance id: %s. new=%s", @@ -490,6 +491,7 @@ def main_modules(action_name, args): print_exc(msg) if not args.force: return [(msg)] + _maybe_persist_instance_data(init) # Stage 3 mods = stages.Modules(init, extract_fns(args), reporter=args.reporter) # Stage 4 @@ -541,6 +543,7 @@ def main_single(name, args): " likely bad things to come!")) if not args.force: return 1 + _maybe_persist_instance_data(init) # Stage 3 mods = stages.Modules(init, extract_fns(args), reporter=args.reporter) mod_args = args.module_args @@ -688,6 +691,15 @@ def status_wrapper(name, args, data_d=None, link_d=None): return len(v1[mode]['errors']) +def _maybe_persist_instance_data(init): + """Write instance-data.json file if absent and datasource is restored.""" + if init.ds_restored: + instance_data_file = os.path.join( + init.paths.run_dir, sources.INSTANCE_JSON_FILE) + if not os.path.exists(instance_data_file): + init.datasource.persist_instance_data() + + def _maybe_set_hostname(init, stage, retry_stage): """Call set-hostname if metadata, vendordata or userdata provides it. @@ -887,6 +899,8 @@ def main(sysv_args=None): if __name__ == '__main__': if 'TZ' not in os.environ: os.environ['TZ'] = ":/etc/localtime" - main(sys.argv) + return_value = main(sys.argv) + if return_value: + sys.exit(return_value) # vi: ts=4 expandtab diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index c3576c04..0db75af9 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -41,7 +41,7 @@ PART_HANDLER_FN_TMPL = 'part-handler-%03d' # For parts without filenames PART_FN_TPL = 'part-%03d' -# Different file beginnings to there content type +# Different file beginnings to their content type INCLUSION_TYPES_MAP = { '#include': 'text/x-include-url', '#include-once': 'text/x-include-once-url', @@ -52,6 +52,7 @@ INCLUSION_TYPES_MAP = { '#cloud-boothook': 'text/cloud-boothook', '#cloud-config-archive': 'text/cloud-config-archive', '#cloud-config-jsonp': 'text/cloud-config-jsonp', + '## template: jinja': 'text/jinja2', } # Sorted longest first @@ -69,9 +70,13 @@ class Handler(object): def __repr__(self): return "%s: [%s]" % (type_utils.obj_name(self), self.list_types()) - @abc.abstractmethod def list_types(self): - raise NotImplementedError() + # Each subclass must define the supported content prefixes it handles. + if not hasattr(self, 'prefixes'): + raise NotImplementedError('Missing prefixes subclass attribute') + else: + return [INCLUSION_TYPES_MAP[prefix] + for prefix in getattr(self, 'prefixes')] @abc.abstractmethod def handle_part(self, *args, **kwargs): diff --git a/cloudinit/handlers/boot_hook.py b/cloudinit/handlers/boot_hook.py index 057b4dbc..dca50a49 100644 --- a/cloudinit/handlers/boot_hook.py +++ b/cloudinit/handlers/boot_hook.py @@ -17,10 +17,13 @@ from cloudinit import util from cloudinit.settings import (PER_ALWAYS) LOG = logging.getLogger(__name__) -BOOTHOOK_PREFIX = "#cloud-boothook" class BootHookPartHandler(handlers.Handler): + + # The content prefixes this handler understands. + prefixes = ['#cloud-boothook'] + def __init__(self, paths, datasource, **_kwargs): handlers.Handler.__init__(self, PER_ALWAYS) self.boothook_dir = paths.get_ipath("boothooks") @@ -28,16 +31,11 @@ class BootHookPartHandler(handlers.Handler): if datasource: self.instance_id = datasource.get_instance_id() - def list_types(self): - return [ - handlers.type_from_starts_with(BOOTHOOK_PREFIX), - ] - def _write_part(self, payload, filename): filename = util.clean_filename(filename) filepath = os.path.join(self.boothook_dir, filename) contents = util.strip_prefix_suffix(util.dos2unix(payload), - prefix=BOOTHOOK_PREFIX) + prefix=self.prefixes[0]) util.write_file(filepath, contents.lstrip(), 0o700) return filepath diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index 178a5b9b..99bf0e61 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -42,14 +42,12 @@ DEF_MERGERS = mergers.string_extract_mergers('dict(replace)+list()+str()') CLOUD_PREFIX = "#cloud-config" JSONP_PREFIX = "#cloud-config-jsonp" -# The file header -> content types this module will handle. -CC_TYPES = { - JSONP_PREFIX: handlers.type_from_starts_with(JSONP_PREFIX), - CLOUD_PREFIX: handlers.type_from_starts_with(CLOUD_PREFIX), -} - class CloudConfigPartHandler(handlers.Handler): + + # The content prefixes this handler understands. + prefixes = [CLOUD_PREFIX, JSONP_PREFIX] + def __init__(self, paths, **_kwargs): handlers.Handler.__init__(self, PER_ALWAYS, version=3) self.cloud_buf = None @@ -58,9 +56,6 @@ class CloudConfigPartHandler(handlers.Handler): self.cloud_fn = paths.get_ipath(_kwargs["cloud_config_path"]) self.file_names = [] - def list_types(self): - return list(CC_TYPES.values()) - def _write_cloud_config(self): if not self.cloud_fn: return @@ -138,7 +133,7 @@ class CloudConfigPartHandler(handlers.Handler): # First time through, merge with an empty dict... if self.cloud_buf is None or not self.file_names: self.cloud_buf = {} - if ctype == CC_TYPES[JSONP_PREFIX]: + if ctype == handlers.INCLUSION_TYPES_MAP[JSONP_PREFIX]: self._merge_patch(payload) else: self._merge_part(payload, headers) diff --git a/cloudinit/handlers/jinja_template.py b/cloudinit/handlers/jinja_template.py new file mode 100644 index 00000000..3fa4097e --- /dev/null +++ b/cloudinit/handlers/jinja_template.py @@ -0,0 +1,137 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +import os +import re + +try: + from jinja2.exceptions import UndefinedError as JUndefinedError +except ImportError: + # No jinja2 dependency + JUndefinedError = Exception + +from cloudinit import handlers +from cloudinit import log as logging +from cloudinit.sources import INSTANCE_JSON_FILE +from cloudinit.templater import render_string, MISSING_JINJA_PREFIX +from cloudinit.util import b64d, load_file, load_json, json_dumps + +from cloudinit.settings import PER_ALWAYS + +LOG = logging.getLogger(__name__) + + +class JinjaTemplatePartHandler(handlers.Handler): + + prefixes = ['## template: jinja'] + + def __init__(self, paths, **_kwargs): + handlers.Handler.__init__(self, PER_ALWAYS, version=3) + self.paths = paths + self.sub_handlers = {} + for handler in _kwargs.get('sub_handlers', []): + for ctype in handler.list_types(): + self.sub_handlers[ctype] = handler + + def handle_part(self, data, ctype, filename, payload, frequency, headers): + if ctype in handlers.CONTENT_SIGNALS: + return + jinja_json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) + rendered_payload = render_jinja_payload_from_file( + payload, filename, jinja_json_file) + if not rendered_payload: + return + subtype = handlers.type_from_starts_with(rendered_payload) + sub_handler = self.sub_handlers.get(subtype) + if not sub_handler: + LOG.warning( + 'Ignoring jinja template for %s. Could not find supported' + ' sub-handler for type %s', filename, subtype) + return + if sub_handler.handler_version == 3: + sub_handler.handle_part( + data, ctype, filename, rendered_payload, frequency, headers) + elif sub_handler.handler_version == 2: + sub_handler.handle_part( + data, ctype, filename, rendered_payload, frequency) + + +def render_jinja_payload_from_file( + payload, payload_fn, instance_data_file, debug=False): + """Render a jinja template payload sourcing variables from jinja_vars_path. + + @param payload: String of jinja template content. Should begin with + ## template: jinja\n. + @param payload_fn: String representing the filename from which the payload + was read used in error reporting. Generally in part-handling this is + 'part-##'. + @param instance_data_file: A path to a json file containing variables that + will be used as jinja template variables. + + @return: A string of jinja-rendered content with the jinja header removed. + Returns None on error. + """ + instance_data = {} + rendered_payload = None + if not os.path.exists(instance_data_file): + raise RuntimeError( + 'Cannot render jinja template vars. Instance data not yet' + ' present at %s' % instance_data_file) + instance_data = load_json(load_file(instance_data_file)) + rendered_payload = render_jinja_payload( + payload, payload_fn, instance_data, debug) + if not rendered_payload: + return None + return rendered_payload + + +def render_jinja_payload(payload, payload_fn, instance_data, debug=False): + instance_jinja_vars = convert_jinja_instance_data( + instance_data, + decode_paths=instance_data.get('base64-encoded-keys', [])) + if debug: + LOG.debug('Converted jinja variables\n%s', + json_dumps(instance_jinja_vars)) + try: + rendered_payload = render_string(payload, instance_jinja_vars) + except (TypeError, JUndefinedError) as e: + LOG.warning( + 'Ignoring jinja template for %s: %s', payload_fn, str(e)) + return None + warnings = [ + "'%s'" % var.replace(MISSING_JINJA_PREFIX, '') + for var in re.findall( + r'%s[^\s]+' % MISSING_JINJA_PREFIX, rendered_payload)] + if warnings: + LOG.warning( + "Could not render jinja template variables in file '%s': %s", + payload_fn, ', '.join(warnings)) + return rendered_payload + + +def convert_jinja_instance_data(data, prefix='', sep='/', decode_paths=()): + """Process instance-data.json dict for use in jinja templates. + + Replace hyphens with underscores for jinja templates and decode any + base64_encoded_keys. + """ + result = {} + decode_paths = [path.replace('-', '_') for path in decode_paths] + for key, value in sorted(data.items()): + if '-' in key: + # Standardize keys for use in #cloud-config/shell templates + key = key.replace('-', '_') + key_path = '{0}{1}{2}'.format(prefix, sep, key) if prefix else key + if key_path in decode_paths: + value = b64d(value) + if isinstance(value, dict): + result[key] = convert_jinja_instance_data( + value, key_path, sep=sep, decode_paths=decode_paths) + if re.match(r'v\d+', key): + # Copy values to top-level aliases + for subkey, subvalue in result[key].items(): + result[subkey] = subvalue + else: + result[key] = value + return result + +# vi: ts=4 expandtab diff --git a/cloudinit/handlers/shell_script.py b/cloudinit/handlers/shell_script.py index e4945a23..214714bc 100644 --- a/cloudinit/handlers/shell_script.py +++ b/cloudinit/handlers/shell_script.py @@ -17,21 +17,18 @@ from cloudinit import util from cloudinit.settings import (PER_ALWAYS) LOG = logging.getLogger(__name__) -SHELL_PREFIX = "#!" class ShellScriptPartHandler(handlers.Handler): + + prefixes = ['#!'] + def __init__(self, paths, **_kwargs): handlers.Handler.__init__(self, PER_ALWAYS) self.script_dir = paths.get_ipath_cur('scripts') if 'script_path' in _kwargs: self.script_dir = paths.get_ipath_cur(_kwargs['script_path']) - def list_types(self): - return [ - handlers.type_from_starts_with(SHELL_PREFIX), - ] - def handle_part(self, data, ctype, filename, payload, frequency): if ctype in handlers.CONTENT_SIGNALS: # TODO(harlowja): maybe delete existing things here diff --git a/cloudinit/handlers/upstart_job.py b/cloudinit/handlers/upstart_job.py index dc338769..83fb0724 100644 --- a/cloudinit/handlers/upstart_job.py +++ b/cloudinit/handlers/upstart_job.py @@ -18,19 +18,16 @@ from cloudinit import util from cloudinit.settings import (PER_INSTANCE) LOG = logging.getLogger(__name__) -UPSTART_PREFIX = "#upstart-job" class UpstartJobPartHandler(handlers.Handler): + + prefixes = ['#upstart-job'] + def __init__(self, paths, **_kwargs): handlers.Handler.__init__(self, PER_INSTANCE) self.upstart_dir = paths.upstart_conf_d - def list_types(self): - return [ - handlers.type_from_starts_with(UPSTART_PREFIX), - ] - def handle_part(self, data, ctype, filename, payload, frequency): if ctype in handlers.CONTENT_SIGNALS: return diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py index 1979cd96..3cc1fb19 100644 --- a/cloudinit/helpers.py +++ b/cloudinit/helpers.py @@ -449,4 +449,8 @@ class DefaultingConfigParser(RawConfigParser): contents = '\n'.join([header, contents, '']) return contents + +def identity(object): + return object + # vi: ts=4 expandtab diff --git a/cloudinit/log.py b/cloudinit/log.py index 1d75c9ff..5ae312ba 100644 --- a/cloudinit/log.py +++ b/cloudinit/log.py @@ -38,10 +38,18 @@ DEF_CON_FORMAT = '%(asctime)s - %(filename)s[%(levelname)s]: %(message)s' logging.Formatter.converter = time.gmtime -def setupBasicLogging(level=DEBUG): +def setupBasicLogging(level=DEBUG, formatter=None): + if not formatter: + formatter = logging.Formatter(DEF_CON_FORMAT) root = logging.getLogger() + for handler in root.handlers: + if hasattr(handler, 'stream') and hasattr(handler.stream, 'name'): + if handler.stream.name == '': + handler.setLevel(level) + return + # Didn't have an existing stderr handler; create a new handler console = logging.StreamHandler(sys.stderr) - console.setFormatter(logging.Formatter(DEF_CON_FORMAT)) + console.setFormatter(formatter) console.setLevel(level) root.addHandler(console) root.setLevel(level) diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 41fde9ba..a775f1a8 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -58,22 +58,27 @@ class InvalidMetaDataException(Exception): pass -def process_base64_metadata(metadata, key_path=''): - """Strip ci-b64 prefix and return metadata with base64-encoded-keys set.""" +def process_instance_metadata(metadata, key_path=''): + """Process all instance metadata cleaning it up for persisting as json. + + Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list + + @return Dict copy of processed metadata. + """ md_copy = copy.deepcopy(metadata) - md_copy['base64-encoded-keys'] = [] + md_copy['base64_encoded_keys'] = [] for key, val in metadata.items(): if key_path: sub_key_path = key_path + '/' + key else: sub_key_path = key if isinstance(val, str) and val.startswith('ci-b64:'): - md_copy['base64-encoded-keys'].append(sub_key_path) + md_copy['base64_encoded_keys'].append(sub_key_path) md_copy[key] = val.replace('ci-b64:', '') if isinstance(val, dict): - return_val = process_base64_metadata(val, sub_key_path) - md_copy['base64-encoded-keys'].extend( - return_val.pop('base64-encoded-keys')) + return_val = process_instance_metadata(val, sub_key_path) + md_copy['base64_encoded_keys'].extend( + return_val.pop('base64_encoded_keys')) md_copy[key] = return_val return md_copy @@ -180,15 +185,24 @@ class DataSource(object): """ self._dirty_cache = True return_value = self._get_data() - json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) if not return_value: return return_value + self.persist_instance_data() + return return_value + + def persist_instance_data(self): + """Process and write INSTANCE_JSON_FILE with all instance metadata. + Replace any hyphens with underscores in key names for use in template + processing. + + @return True on successful write, False otherwise. + """ instance_data = { 'ds': { - 'meta-data': self.metadata, - 'user-data': self.get_userdata_raw(), - 'vendor-data': self.get_vendordata_raw()}} + 'meta_data': self.metadata, + 'user_data': self.get_userdata_raw(), + 'vendor_data': self.get_vendordata_raw()}} if hasattr(self, 'network_json'): network_json = getattr(self, 'network_json') if network_json != UNSET: @@ -202,16 +216,17 @@ class DataSource(object): try: # Process content base64encoding unserializable values content = util.json_dumps(instance_data) - # Strip base64: prefix and return base64-encoded-keys - processed_data = process_base64_metadata(json.loads(content)) + # Strip base64: prefix and set base64_encoded_keys list. + processed_data = process_instance_metadata(json.loads(content)) except TypeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) - return return_value + return False except UnicodeDecodeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) - return return_value + return False + json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) write_json(json_file, processed_data, mode=0o600) - return return_value + return True def _get_data(self): """Walk metadata sources, process crawled data and save attributes.""" diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index 9e939c1e..8299af23 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -20,10 +20,12 @@ class DataSourceTestSubclassNet(DataSource): dsname = 'MyTestSubclass' url_max_wait = 55 - def __init__(self, sys_cfg, distro, paths, custom_userdata=None): + def __init__(self, sys_cfg, distro, paths, custom_userdata=None, + get_data_retval=True): super(DataSourceTestSubclassNet, self).__init__( sys_cfg, distro, paths) self._custom_userdata = custom_userdata + self._get_data_retval = get_data_retval def _get_cloud_name(self): return 'SubclassCloudName' @@ -37,7 +39,7 @@ class DataSourceTestSubclassNet(DataSource): else: self.userdata_raw = 'userdata_raw' self.vendordata_raw = 'vendordata_raw' - return True + return self._get_data_retval class InvalidDataSourceTestSubclassNet(DataSource): @@ -264,7 +266,18 @@ class TestDataSource(CiTestCase): self.assertEqual('fqdnhostname.domain.com', datasource.get_hostname(fqdn=True)) - def test_get_data_write_json_instance_data(self): + def test_get_data_does_not_write_instance_data_on_failure(self): + """get_data does not write INSTANCE_JSON_FILE on get_data False.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp}), + get_data_retval=False) + self.assertFalse(datasource.get_data()) + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + self.assertFalse( + os.path.exists(json_file), 'Found unexpected file %s' % json_file) + + def test_get_data_writes_json_instance_data_on_success(self): """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( @@ -273,7 +286,7 @@ class TestDataSource(CiTestCase): json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) expected = { - 'base64-encoded-keys': [], + 'base64_encoded_keys': [], 'v1': { 'availability-zone': 'myaz', 'cloud-name': 'subclasscloudname', @@ -281,11 +294,12 @@ class TestDataSource(CiTestCase): 'local-hostname': 'test-subclass-hostname', 'region': 'myregion'}, 'ds': { - 'meta-data': {'availability_zone': 'myaz', + 'meta_data': {'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', 'region': 'myregion'}, - 'user-data': 'userdata_raw', - 'vendor-data': 'vendordata_raw'}} + 'user_data': 'userdata_raw', + 'vendor_data': 'vendordata_raw'}} + self.maxDiff = None self.assertEqual(expected, util.load_json(content)) file_stat = os.stat(json_file) self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode)) @@ -296,7 +310,7 @@ class TestDataSource(CiTestCase): datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) - self.assertTrue(datasource.get_data()) + datasource.get_data() json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) expected_userdata = { @@ -306,7 +320,40 @@ class TestDataSource(CiTestCase): " 'cloudinit.helpers.Paths'>"}} instance_json = util.load_json(content) self.assertEqual( - expected_userdata, instance_json['ds']['user-data']) + expected_userdata, instance_json['ds']['user_data']) + + def test_persist_instance_data_writes_ec2_metadata_when_set(self): + """When ec2_metadata class attribute is set, persist to json.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp})) + datasource.ec2_metadata = UNSET + datasource.get_data() + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + instance_data = util.load_json(util.load_file(json_file)) + self.assertNotIn('ec2_metadata', instance_data['ds']) + datasource.ec2_metadata = {'ec2stuff': 'is good'} + datasource.persist_instance_data() + instance_data = util.load_json(util.load_file(json_file)) + self.assertEqual( + {'ec2stuff': 'is good'}, + instance_data['ds']['ec2_metadata']) + + def test_persist_instance_data_writes_network_json_when_set(self): + """When network_data.json class attribute is set, persist to json.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp})) + datasource.get_data() + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + instance_data = util.load_json(util.load_file(json_file)) + self.assertNotIn('network_json', instance_data['ds']) + datasource.network_json = {'network_json': 'is good'} + datasource.persist_instance_data() + instance_data = util.load_json(util.load_file(json_file)) + self.assertEqual( + {'network_json': 'is good'}, + instance_data['ds']['network_json']) @skipIf(not six.PY3, "json serialization on <= py2.7 handles bytes") def test_get_data_base64encodes_unserializable_bytes(self): @@ -320,11 +367,11 @@ class TestDataSource(CiTestCase): content = util.load_file(json_file) instance_json = util.load_json(content) self.assertEqual( - ['ds/user-data/key2/key2.1'], - instance_json['base64-encoded-keys']) + ['ds/user_data/key2/key2.1'], + instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': 'EjM='}}, - instance_json['ds']['user-data']) + instance_json['ds']['user_data']) @skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes") def test_get_data_handles_bytes_values(self): @@ -337,10 +384,10 @@ class TestDataSource(CiTestCase): json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) instance_json = util.load_json(content) - self.assertEqual([], instance_json['base64-encoded-keys']) + self.assertEqual([], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': '\x123'}}, - instance_json['ds']['user-data']) + instance_json['ds']['user_data']) @skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8") def test_non_utf8_encoding_logs_warning(self): diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 8874d405..ef5c6996 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -17,10 +17,11 @@ from cloudinit.settings import ( from cloudinit import handlers # Default handlers (used if not overridden) -from cloudinit.handlers import boot_hook as bh_part -from cloudinit.handlers import cloud_config as cc_part -from cloudinit.handlers import shell_script as ss_part -from cloudinit.handlers import upstart_job as up_part +from cloudinit.handlers.boot_hook import BootHookPartHandler +from cloudinit.handlers.cloud_config import CloudConfigPartHandler +from cloudinit.handlers.jinja_template import JinjaTemplatePartHandler +from cloudinit.handlers.shell_script import ShellScriptPartHandler +from cloudinit.handlers.upstart_job import UpstartJobPartHandler from cloudinit.event import EventType @@ -413,12 +414,17 @@ class Init(object): 'datasource': self.datasource, }) # TODO(harlowja) Hmmm, should we dynamically import these?? + cloudconfig_handler = CloudConfigPartHandler(**opts) + shellscript_handler = ShellScriptPartHandler(**opts) def_handlers = [ - cc_part.CloudConfigPartHandler(**opts), - ss_part.ShellScriptPartHandler(**opts), - bh_part.BootHookPartHandler(**opts), - up_part.UpstartJobPartHandler(**opts), + cloudconfig_handler, + shellscript_handler, + BootHookPartHandler(**opts), + UpstartJobPartHandler(**opts), ] + opts.update( + {'sub_handlers': [cloudconfig_handler, shellscript_handler]}) + def_handlers.append(JinjaTemplatePartHandler(**opts)) return def_handlers def _default_userdata_handlers(self): diff --git a/cloudinit/templater.py b/cloudinit/templater.py index 7e7acb86..b668674b 100644 --- a/cloudinit/templater.py +++ b/cloudinit/templater.py @@ -13,6 +13,7 @@ import collections import re + try: from Cheetah.Template import Template as CTemplate CHEETAH_AVAILABLE = True @@ -20,23 +21,44 @@ except (ImportError, AttributeError): CHEETAH_AVAILABLE = False try: - import jinja2 + from jinja2.runtime import implements_to_string from jinja2 import Template as JTemplate + from jinja2 import DebugUndefined as JUndefined JINJA_AVAILABLE = True except (ImportError, AttributeError): + from cloudinit.helpers import identity + implements_to_string = identity JINJA_AVAILABLE = False + JUndefined = object from cloudinit import log as logging from cloudinit import type_utils as tu from cloudinit import util + LOG = logging.getLogger(__name__) TYPE_MATCHER = re.compile(r"##\s*template:(.*)", re.I) BASIC_MATCHER = re.compile(r'\$\{([A-Za-z0-9_.]+)\}|\$([A-Za-z0-9_.]+)') +MISSING_JINJA_PREFIX = u'CI_MISSING_JINJA_VAR/' + + +@implements_to_string # Needed for python2.7. Otherwise cached super.__str__ +class UndefinedJinjaVariable(JUndefined): + """Class used to represent any undefined jinja template varible.""" + + def __str__(self): + return u'%s%s' % (MISSING_JINJA_PREFIX, self._undefined_name) + + def __sub__(self, other): + other = str(other).replace(MISSING_JINJA_PREFIX, '') + raise TypeError( + 'Undefined jinja variable: "{this}-{other}". Jinja tried' + ' subtraction. Perhaps you meant "{this}_{other}"?'.format( + this=self._undefined_name, other=other)) def basic_render(content, params): - """This does simple replacement of bash variable like templates. + """This does sumple replacement of bash variable like templates. It identifies patterns like ${a} or $a and can also identify patterns like ${a.b} or $a.b which will look for a key 'b' in the dictionary rooted @@ -82,7 +104,7 @@ def detect_template(text): # keep_trailing_newline is in jinja2 2.7+, not 2.6 add = "\n" if content.endswith("\n") else "" return JTemplate(content, - undefined=jinja2.StrictUndefined, + undefined=UndefinedJinjaVariable, trim_blocks=True).render(**params) + add if text.find("\n") != -1: diff --git a/cloudinit/tests/helpers.py b/cloudinit/tests/helpers.py index 42f56c27..2eb7b0cd 100644 --- a/cloudinit/tests/helpers.py +++ b/cloudinit/tests/helpers.py @@ -32,6 +32,7 @@ from cloudinit import cloud from cloudinit import distros from cloudinit import helpers as ch from cloudinit.sources import DataSourceNone +from cloudinit.templater import JINJA_AVAILABLE from cloudinit import util _real_subp = util.subp @@ -518,6 +519,14 @@ def skipUnlessJsonSchema(): _missing_jsonschema_dep, "No python-jsonschema dependency present.") +def skipUnlessJinja(): + return skipIf(not JINJA_AVAILABLE, "No jinja dependency present.") + + +def skipIfJinja(): + return skipIf(JINJA_AVAILABLE, "Jinja dependency present.") + + # older versions of mock do not have the useful 'assert_not_called' if not hasattr(mock.Mock, 'assert_not_called'): def __mock_assert_not_called(mmock): diff --git a/doc/rtd/topics/capabilities.rst b/doc/rtd/topics/capabilities.rst index 3e2c9e31..2d8e2538 100644 --- a/doc/rtd/topics/capabilities.rst +++ b/doc/rtd/topics/capabilities.rst @@ -16,13 +16,15 @@ User configurability `Cloud-init`_ 's behavior can be configured via user-data. - User-data can be given by the user at instance launch time. + User-data can be given by the user at instance launch time. See + :ref:`user_data_formats` for acceptable user-data content. + This is done via the ``--user-data`` or ``--user-data-file`` argument to ec2-run-instances for example. -* Check your local clients documentation for how to provide a `user-data` - string or `user-data` file for usage by cloud-init on instance creation. +* Check your local client's documentation for how to provide a `user-data` + string or `user-data` file to cloud-init on instance creation. Feature detection @@ -166,6 +168,13 @@ likely be promoted to top-level subcommands when stable. validation is work in progress and supports a subset of cloud-config modules. + * ``cloud-init devel render``: Use cloud-init's jinja template render to + process **#cloud-config** or **custom-scripts**, injecting any variables + from ``/run/cloud-init/instance-data.json``. It accepts a user-data file + containing the jinja template header ``## template: jinja`` and renders + that content with any instance-data.json variables present. + + .. _cli_clean: cloud-init clean diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index 83034589..14432e65 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -18,6 +18,8 @@ single way to access the different cloud systems methods to provide this data through the typical usage of subclasses. +.. _instance_metadata: + instance-data ------------- For reference, cloud-init stores all the metadata, vendordata and userdata @@ -110,6 +112,51 @@ Below is an instance-data.json example from an OpenStack instance: } } + +As of cloud-init v. 18.4, any values present in +``/run/cloud-init/instance-data.json`` can be used in cloud-init user data +scripts or cloud config data. This allows consumers to use cloud-init's +vendor-neutral, standardized metadata keys as well as datasource-specific +content for any scripts or cloud-config modules they are using. + +To use instance-data.json values in scripts and **#config-config** files the +user-data will need to contain the following header as the first line **## template: jinja**. Cloud-init will source all variables defined in +``/run/cloud-init/instance-data.json`` and allow scripts or cloud-config files +to reference those paths. Below are two examples:: + + * Cloud config calling home with the ec2 public hostname and avaliability-zone + ``` + ## template: jinja + #cloud-config + runcmd: + - echo 'EC2 public hostname allocated to instance: {{ ds.meta_data.public_hostname }}' > /tmp/instance_metadata + - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> /tmp/instance_metadata + - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", "availability-zone": "{{ v1.availability_zone }}"}' https://example.com.com + ``` + + * Custom user script performing different operations based on region + ``` + ## template: jinja + #!/bin/bash + {% if v1.region == 'us-east-2' -%} + echo 'Installing custom proxies for {{ v1.region }} + sudo apt-get install my-xtra-fast-stack + {%- endif %} + ... + + ``` + +.. note:: + Trying to reference jinja variables that don't exist in + instance-data.json will result in warnings in ``/var/log/cloud-init.log`` + and the following string in your rendered user-data: + ``CI_MISSING_JINJA_VAR/``. + +.. note:: + To save time designing your user-data for a specific cloud's + instance-data.json, use the 'render' cloud-init command on an + instance booted on your favorite cloud. See :ref:`cli_devel` for more + information. Datasource API diff --git a/doc/rtd/topics/format.rst b/doc/rtd/topics/format.rst index 1b0ff366..15234d21 100644 --- a/doc/rtd/topics/format.rst +++ b/doc/rtd/topics/format.rst @@ -1,6 +1,8 @@ -******* -Formats -******* +.. _user_data_formats: + +***************** +User-Data Formats +***************** User data that will be acted upon by cloud-init must be in one of the following types. @@ -65,6 +67,11 @@ Typically used by those who just want to execute a shell script. Begins with: ``#!`` or ``Content-Type: text/x-shellscript`` when using a MIME archive. +.. note:: + New in cloud-init v. 18.4: User-data scripts can also render cloud instance + metadata variables using jinja templating. See + :ref:`instance_metadata` for more information. + Example ------- @@ -103,12 +110,18 @@ These things include: - certain ssh keys should be imported - *and many more...* -**Note:** The file must be valid yaml syntax. +.. note:: + This file must be valid yaml syntax. See the :ref:`yaml_examples` section for a commented set of examples of supported cloud config formats. Begins with: ``#cloud-config`` or ``Content-Type: text/cloud-config`` when using a MIME archive. +.. note:: + New in cloud-init v. 18.4: Cloud config dta can also render cloud instance + metadata variables using jinja templating. See + :ref:`instance_metadata` for more information. + Upstart Job =========== diff --git a/tests/cloud_tests/testcases/base.py b/tests/cloud_tests/testcases/base.py index 696db8dd..27458271 100644 --- a/tests/cloud_tests/testcases/base.py +++ b/tests/cloud_tests/testcases/base.py @@ -168,7 +168,7 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) self.assertEqual( - ['ds/user-data'], instance_data['base64-encoded-keys']) + ['ds/user_data'], instance_data['base64_encoded_keys']) ds = instance_data.get('ds', {}) v1_data = instance_data.get('v1', {}) metadata = ds.get('meta-data', {}) @@ -214,8 +214,8 @@ class CloudTestCase(unittest.TestCase): instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) self.assertEqual( - ['ds/user-data', 'ds/vendor-data'], - sorted(instance_data['base64-encoded-keys'])) + ['ds/user_data', 'ds/vendor_data'], + sorted(instance_data['base64_encoded_keys'])) self.assertEqual('nocloud', v1_data['cloud-name']) self.assertIsNone( v1_data['availability-zone'], @@ -249,7 +249,7 @@ class CloudTestCase(unittest.TestCase): instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) self.assertEqual( - ['ds/user-data'], instance_data['base64-encoded-keys']) + ['ds/user_data'], instance_data['base64_encoded_keys']) self.assertEqual('nocloud', v1_data['cloud-name']) self.assertIsNone( v1_data['availability-zone'], diff --git a/tests/unittests/test_builtin_handlers.py b/tests/unittests/test_builtin_handlers.py index 9751ed95..abe820e1 100644 --- a/tests/unittests/test_builtin_handlers.py +++ b/tests/unittests/test_builtin_handlers.py @@ -2,27 +2,34 @@ """Tests of the built-in user data handlers.""" +import copy import os import shutil import tempfile +from textwrap import dedent -try: - from unittest import mock -except ImportError: - import mock -from cloudinit.tests import helpers as test_helpers +from cloudinit.tests.helpers import ( + FilesystemMockingTestCase, CiTestCase, mock, skipUnlessJinja) from cloudinit import handlers from cloudinit import helpers from cloudinit import util -from cloudinit.handlers import upstart_job +from cloudinit.handlers.cloud_config import CloudConfigPartHandler +from cloudinit.handlers.jinja_template import ( + JinjaTemplatePartHandler, convert_jinja_instance_data, + render_jinja_payload) +from cloudinit.handlers.shell_script import ShellScriptPartHandler +from cloudinit.handlers.upstart_job import UpstartJobPartHandler from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE) -class TestBuiltins(test_helpers.FilesystemMockingTestCase): +class TestUpstartJobPartHandler(FilesystemMockingTestCase): + + mpath = 'cloudinit.handlers.upstart_job.' + def test_upstart_frequency_no_out(self): c_root = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, c_root) @@ -32,14 +39,13 @@ class TestBuiltins(test_helpers.FilesystemMockingTestCase): 'cloud_dir': c_root, 'upstart_dir': up_root, }) - freq = PER_ALWAYS - h = upstart_job.UpstartJobPartHandler(paths) + h = UpstartJobPartHandler(paths) # No files should be written out when # the frequency is ! per-instance h.handle_part('', handlers.CONTENT_START, None, None, None) h.handle_part('blah', 'text/upstart-job', - 'test.conf', 'blah', freq) + 'test.conf', 'blah', frequency=PER_ALWAYS) h.handle_part('', handlers.CONTENT_END, None, None, None) self.assertEqual(0, len(os.listdir(up_root))) @@ -48,7 +54,6 @@ class TestBuiltins(test_helpers.FilesystemMockingTestCase): # files should be written out when frequency is ! per-instance new_root = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, new_root) - freq = PER_INSTANCE self.patchOS(new_root) self.patchUtils(new_root) @@ -56,22 +61,297 @@ class TestBuiltins(test_helpers.FilesystemMockingTestCase): 'upstart_dir': "/etc/upstart", }) - upstart_job.SUITABLE_UPSTART = True util.ensure_dir("/run") util.ensure_dir("/etc/upstart") - with mock.patch.object(util, 'subp') as mockobj: - h = upstart_job.UpstartJobPartHandler(paths) - h.handle_part('', handlers.CONTENT_START, - None, None, None) - h.handle_part('blah', 'text/upstart-job', - 'test.conf', 'blah', freq) - h.handle_part('', handlers.CONTENT_END, - None, None, None) + with mock.patch(self.mpath + 'SUITABLE_UPSTART', return_value=True): + with mock.patch.object(util, 'subp') as m_subp: + h = UpstartJobPartHandler(paths) + h.handle_part('', handlers.CONTENT_START, + None, None, None) + h.handle_part('blah', 'text/upstart-job', + 'test.conf', 'blah', frequency=PER_INSTANCE) + h.handle_part('', handlers.CONTENT_END, + None, None, None) - self.assertEqual(len(os.listdir('/etc/upstart')), 1) + self.assertEqual(len(os.listdir('/etc/upstart')), 1) - mockobj.assert_called_once_with( + m_subp.assert_called_once_with( ['initctl', 'reload-configuration'], capture=False) + +class TestJinjaTemplatePartHandler(CiTestCase): + + with_logs = True + + mpath = 'cloudinit.handlers.jinja_template.' + + def setUp(self): + super(TestJinjaTemplatePartHandler, self).setUp() + self.tmp = self.tmp_dir() + self.run_dir = os.path.join(self.tmp, 'run_dir') + util.ensure_dir(self.run_dir) + self.paths = helpers.Paths({ + 'cloud_dir': self.tmp, 'run_dir': self.run_dir}) + + def test_jinja_template_part_handler_defaults(self): + """On init, paths are saved and subhandler types are empty.""" + h = JinjaTemplatePartHandler(self.paths) + self.assertEqual(['## template: jinja'], h.prefixes) + self.assertEqual(3, h.handler_version) + self.assertEqual(self.paths, h.paths) + self.assertEqual({}, h.sub_handlers) + + def test_jinja_template_part_handler_looks_up_sub_handler_types(self): + """When sub_handlers are passed, init lists types of subhandlers.""" + script_handler = ShellScriptPartHandler(self.paths) + cloudconfig_handler = CloudConfigPartHandler(self.paths) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler, cloudconfig_handler]) + self.assertItemsEqual( + ['text/cloud-config', 'text/cloud-config-jsonp', + 'text/x-shellscript'], + h.sub_handlers) + + def test_jinja_template_part_handler_looks_up_subhandler_types(self): + """When sub_handlers are passed, init lists types of subhandlers.""" + script_handler = ShellScriptPartHandler(self.paths) + cloudconfig_handler = CloudConfigPartHandler(self.paths) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler, cloudconfig_handler]) + self.assertItemsEqual( + ['text/cloud-config', 'text/cloud-config-jsonp', + 'text/x-shellscript'], + h.sub_handlers) + + def test_jinja_template_handle_noop_on_content_signals(self): + """Perform no part handling when content type is CONTENT_SIGNALS.""" + script_handler = ShellScriptPartHandler(self.paths) + + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + with mock.patch.object(script_handler, 'handle_part') as m_handle_part: + h.handle_part( + data='data', ctype=handlers.CONTENT_START, filename='part-1', + payload='## template: jinja\n#!/bin/bash\necho himom', + frequency='freq', headers='headers') + m_handle_part.assert_not_called() + + @skipUnlessJinja() + def test_jinja_template_handle_subhandler_v2_with_clean_payload(self): + """Call version 2 subhandler.handle_part with stripped payload.""" + script_handler = ShellScriptPartHandler(self.paths) + self.assertEqual(2, script_handler.handler_version) + + # Create required instance-data.json file + instance_json = os.path.join(self.run_dir, 'instance-data.json') + instance_data = {'topkey': 'echo himom'} + util.write_file(instance_json, util.json_dumps(instance_data)) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + with mock.patch.object(script_handler, 'handle_part') as m_part: + # ctype with leading '!' not in handlers.CONTENT_SIGNALS + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_START, + filename='part01', + payload='## template: jinja \t \n#!/bin/bash\n{{ topkey }}', + frequency='freq', headers='headers') + m_part.assert_called_once_with( + 'data', '!__begin__', 'part01', '#!/bin/bash\necho himom', 'freq') + + @skipUnlessJinja() + def test_jinja_template_handle_subhandler_v3_with_clean_payload(self): + """Call version 3 subhandler.handle_part with stripped payload.""" + cloudcfg_handler = CloudConfigPartHandler(self.paths) + self.assertEqual(3, cloudcfg_handler.handler_version) + + # Create required instance-data.json file + instance_json = os.path.join(self.run_dir, 'instance-data.json') + instance_data = {'topkey': {'sub': 'runcmd: [echo hi]'}} + util.write_file(instance_json, util.json_dumps(instance_data)) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[cloudcfg_handler]) + with mock.patch.object(cloudcfg_handler, 'handle_part') as m_part: + # ctype with leading '!' not in handlers.CONTENT_SIGNALS + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_END, + filename='part01', + payload='## template: jinja\n#cloud-config\n{{ topkey.sub }}', + frequency='freq', headers='headers') + m_part.assert_called_once_with( + 'data', '!__end__', 'part01', '#cloud-config\nruncmd: [echo hi]', + 'freq', 'headers') + + def test_jinja_template_handle_errors_on_missing_instance_data_json(self): + """If instance-data is absent, raise an error from handle_part.""" + script_handler = ShellScriptPartHandler(self.paths) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + with self.assertRaises(RuntimeError) as context_manager: + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_START, + filename='part01', + payload='## template: jinja \n#!/bin/bash\necho himom', + frequency='freq', headers='headers') + script_file = os.path.join(script_handler.script_dir, 'part01') + self.assertEqual( + 'Cannot render jinja template vars. Instance data not yet present' + ' at {}/instance-data.json'.format( + self.run_dir), str(context_manager.exception)) + self.assertFalse( + os.path.exists(script_file), + 'Unexpected file created %s' % script_file) + + @skipUnlessJinja() + def test_jinja_template_handle_renders_jinja_content(self): + """When present, render jinja variables from instance-data.json.""" + script_handler = ShellScriptPartHandler(self.paths) + instance_json = os.path.join(self.run_dir, 'instance-data.json') + instance_data = {'topkey': {'subkey': 'echo himom'}} + util.write_file(instance_json, util.json_dumps(instance_data)) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_START, + filename='part01', + payload=( + '## template: jinja \n' + '#!/bin/bash\n' + '{{ topkey.subkey|default("nosubkey") }}'), + frequency='freq', headers='headers') + script_file = os.path.join(script_handler.script_dir, 'part01') + self.assertNotIn( + 'Instance data not yet present at {}/instance-data.json'.format( + self.run_dir), + self.logs.getvalue()) + self.assertEqual( + '#!/bin/bash\necho himom', util.load_file(script_file)) + + @skipUnlessJinja() + def test_jinja_template_handle_renders_jinja_content_missing_keys(self): + """When specified jinja variable is undefined, log a warning.""" + script_handler = ShellScriptPartHandler(self.paths) + instance_json = os.path.join(self.run_dir, 'instance-data.json') + instance_data = {'topkey': {'subkey': 'echo himom'}} + util.write_file(instance_json, util.json_dumps(instance_data)) + h = JinjaTemplatePartHandler( + self.paths, sub_handlers=[script_handler]) + h.handle_part( + data='data', ctype="!" + handlers.CONTENT_START, + filename='part01', + payload='## template: jinja \n#!/bin/bash\n{{ goodtry }}', + frequency='freq', headers='headers') + script_file = os.path.join(script_handler.script_dir, 'part01') + self.assertTrue( + os.path.exists(script_file), + 'Missing expected file %s' % script_file) + self.assertIn( + "WARNING: Could not render jinja template variables in file" + " 'part01': 'goodtry'\n", + self.logs.getvalue()) + + +class TestConvertJinjaInstanceData(CiTestCase): + + def test_convert_instance_data_hyphens_to_underscores(self): + """Replace hyphenated keys with underscores in instance-data.""" + data = {'hyphenated-key': 'hyphenated-val', + 'underscore_delim_key': 'underscore_delimited_val'} + expected_data = {'hyphenated_key': 'hyphenated-val', + 'underscore_delim_key': 'underscore_delimited_val'} + self.assertEqual( + expected_data, + convert_jinja_instance_data(data=data)) + + def test_convert_instance_data_promotes_versioned_keys_to_top_level(self): + """Any versioned keys are promoted as top-level keys + + This provides any cloud-init standardized keys up at a top-level to + allow ease of reference for users. Intsead of v1.availability_zone, + the name availability_zone can be used in templates. + """ + data = {'ds': {'dskey1': 1, 'dskey2': 2}, + 'v1': {'v1key1': 'v1.1'}, + 'v2': {'v2key1': 'v2.1'}} + expected_data = copy.deepcopy(data) + expected_data.update({'v1key1': 'v1.1', 'v2key1': 'v2.1'}) + + converted_data = convert_jinja_instance_data(data=data) + self.assertItemsEqual( + ['ds', 'v1', 'v2', 'v1key1', 'v2key1'], converted_data.keys()) + self.assertEqual( + expected_data, + converted_data) + + def test_convert_instance_data_most_recent_version_of_promoted_keys(self): + """The most-recent versioned key value is promoted to top-level.""" + data = {'v1': {'key1': 'old v1 key1', 'key2': 'old v1 key2'}, + 'v2': {'key1': 'newer v2 key1', 'key3': 'newer v2 key3'}, + 'v3': {'key1': 'newest v3 key1'}} + expected_data = copy.deepcopy(data) + expected_data.update( + {'key1': 'newest v3 key1', 'key2': 'old v1 key2', + 'key3': 'newer v2 key3'}) + + converted_data = convert_jinja_instance_data(data=data) + self.assertEqual( + expected_data, + converted_data) + + def test_convert_instance_data_decodes_decode_paths(self): + """Any decode_paths provided are decoded by convert_instance_data.""" + data = {'key1': {'subkey1': 'aGkgbW9t'}, 'key2': 'aGkgZGFk'} + expected_data = copy.deepcopy(data) + expected_data['key1']['subkey1'] = 'hi mom' + + converted_data = convert_jinja_instance_data( + data=data, decode_paths=('key1/subkey1',)) + self.assertEqual( + expected_data, + converted_data) + + +class TestRenderJinjaPayload(CiTestCase): + + with_logs = True + + @skipUnlessJinja() + def test_render_jinja_payload_logs_jinja_vars_on_debug(self): + """When debug is True, log jinja varables available.""" + payload = ( + '## template: jinja\n#!/bin/sh\necho hi from {{ v1.hostname }}') + instance_data = {'v1': {'hostname': 'foo'}, 'instance-id': 'iid'} + expected_log = dedent("""\ + DEBUG: Converted jinja variables + { + "hostname": "foo", + "instance_id": "iid", + "v1": { + "hostname": "foo" + } + } + """) + self.assertEqual( + render_jinja_payload( + payload=payload, payload_fn='myfile', + instance_data=instance_data, debug=True), + '#!/bin/sh\necho hi from foo') + self.assertEqual(expected_log, self.logs.getvalue()) + + @skipUnlessJinja() + def test_render_jinja_payload_replaces_missing_variables_and_warns(self): + """Warn on missing jinja variables and replace the absent variable.""" + payload = ( + '## template: jinja\n#!/bin/sh\necho hi from {{ NOTHERE }}') + instance_data = {'v1': {'hostname': 'foo'}, 'instance-id': 'iid'} + self.assertEqual( + render_jinja_payload( + payload=payload, payload_fn='myfile', + instance_data=instance_data), + '#!/bin/sh\necho hi from CI_MISSING_JINJA_VAR/NOTHERE') + expected_log = ( + 'WARNING: Could not render jinja template variables in file' + " 'myfile': 'NOTHERE'") + self.assertIn(expected_log, self.logs.getvalue()) + # vi: ts=4 expandtab diff --git a/tests/unittests/test_handler/test_handler_etc_hosts.py b/tests/unittests/test_handler/test_handler_etc_hosts.py index ced05a8d..d854afcb 100644 --- a/tests/unittests/test_handler/test_handler_etc_hosts.py +++ b/tests/unittests/test_handler/test_handler_etc_hosts.py @@ -49,6 +49,7 @@ class TestHostsFile(t_help.FilesystemMockingTestCase): if '192.168.1.1\tblah.blah.us\tblah' not in contents: self.assertIsNone('Default etc/hosts content modified') + @t_help.skipUnlessJinja() def test_write_etc_hosts_suse_template(self): cfg = { 'manage_etc_hosts': 'template', diff --git a/tests/unittests/test_handler/test_handler_ntp.py b/tests/unittests/test_handler/test_handler_ntp.py index 6fe3659d..0f22e579 100644 --- a/tests/unittests/test_handler/test_handler_ntp.py +++ b/tests/unittests/test_handler/test_handler_ntp.py @@ -3,6 +3,7 @@ from cloudinit.config import cc_ntp from cloudinit.sources import DataSourceNone from cloudinit import (distros, helpers, cloud, util) + from cloudinit.tests.helpers import ( CiTestCase, FilesystemMockingTestCase, mock, skipUnlessJsonSchema) diff --git a/tests/unittests/test_templating.py b/tests/unittests/test_templating.py index 20c87efa..c36e6eb0 100644 --- a/tests/unittests/test_templating.py +++ b/tests/unittests/test_templating.py @@ -21,6 +21,9 @@ except ImportError: class TestTemplates(test_helpers.CiTestCase): + + with_logs = True + jinja_utf8 = b'It\xe2\x80\x99s not ascii, {{name}}\n' jinja_utf8_rbob = b'It\xe2\x80\x99s not ascii, bob\n'.decode('utf-8') @@ -124,6 +127,13 @@ $a,$b''' self.add_header("jinja", self.jinja_utf8), {"name": "bob"}), self.jinja_utf8_rbob) + def test_jinja_nonascii_render_undefined_variables_to_default_py3(self): + """Test py3 jinja render_to_string with undefined variable default.""" + self.assertEqual( + templater.render_string( + self.add_header("jinja", self.jinja_utf8), {}), + self.jinja_utf8_rbob.replace('bob', 'CI_MISSING_JINJA_VAR/name')) + def test_jinja_nonascii_render_to_file(self): """Test jinja render_to_file of a filename with non-ascii content.""" tmpl_fn = self.tmp_path("j-render-to-file.template") @@ -144,5 +154,18 @@ $a,$b''' result = templater.render_from_file(tmpl_fn, {"name": "bob"}) self.assertEqual(result, self.jinja_utf8_rbob) + @test_helpers.skipIfJinja() + def test_jinja_warns_on_missing_dep_and_uses_basic_renderer(self): + """Test jinja render_from_file will fallback to basic renderer.""" + tmpl_fn = self.tmp_path("j-render-from-file.template") + write_file(tmpl_fn, omode="wb", + content=self.add_header( + "jinja", self.jinja_utf8).encode('utf-8')) + result = templater.render_from_file(tmpl_fn, {"name": "bob"}) + self.assertEqual(result, self.jinja_utf8.decode()) + self.assertIn( + 'WARNING: Jinja not available as the selected renderer for desired' + ' template, reverting to the basic renderer.', + self.logs.getvalue()) # vi: ts=4 expandtab -- cgit v1.2.3 From fc4b966ba928b30b1c586407e752e0b51b1031e8 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Tue, 25 Sep 2018 21:59:16 +0000 Subject: cli: add cloud-init query subcommand to query instance metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cloud-init caches any cloud metadata crawled during boot in the file /run/cloud-init/instance-data.json. Cloud-init also standardizes some of that metadata across all clouds. The command 'cloud-init query' surfaces a simple CLI to query or format any cached instance metadata so that scripts or end-users do not have to write tools to crawl metadata themselves. Since 'cloud-init query' is runnable by non-root users, redact any sensitive data from instance-data.json and provide a root-readable unredacted instance-data-sensitive.json. Datasources can now define a sensitive_metadata_keys tuple which will redact any matching keys which could contain passwords or credentials from instance-data.json. Also add the following standardized 'v1' instance-data.json keys:   - user_data: The base64encoded user-data provided at instance launch   - vendor_data: Any vendor_data provided to the instance at launch   - underscore_delimited versions of existing hyphenated keys:     instance_id, local_hostname, availability_zone, cloud_name --- bash_completion/cloud-init | 4 +- cloudinit/cmd/devel/render.py | 7 +- cloudinit/cmd/main.py | 10 ++ cloudinit/cmd/query.py | 155 ++++++++++++++++++ cloudinit/cmd/tests/test_query.py | 193 +++++++++++++++++++++++ cloudinit/helpers.py | 4 + cloudinit/sources/__init__.py | 76 +++++++-- cloudinit/sources/tests/test_init.py | 130 ++++++++++++--- doc/rtd/index.rst | 1 + doc/rtd/topics/capabilities.rst | 105 ++++++++++--- doc/rtd/topics/datasources.rst | 148 +---------------- doc/rtd/topics/instancedata.rst | 297 +++++++++++++++++++++++++++++++++++ integration-requirements.txt | 3 +- tests/cloud_tests/testcases/base.py | 52 +++--- 14 files changed, 952 insertions(+), 233 deletions(-) create mode 100644 cloudinit/cmd/query.py create mode 100644 cloudinit/cmd/tests/test_query.py create mode 100644 doc/rtd/topics/instancedata.rst (limited to 'cloudinit/cmd/main.py') diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init index 6d01bf3a..8c25032f 100644 --- a/bash_completion/cloud-init +++ b/bash_completion/cloud-init @@ -10,7 +10,7 @@ _cloudinit_complete() cur_word="${COMP_WORDS[COMP_CWORD]}" prev_word="${COMP_WORDS[COMP_CWORD-1]}" - subcmds="analyze clean collect-logs devel dhclient-hook features init modules single status" + subcmds="analyze clean collect-logs devel dhclient-hook features init modules query single status" base_params="--help --file --version --debug --force" case ${COMP_CWORD} in 1) @@ -40,6 +40,8 @@ _cloudinit_complete() COMPREPLY=($(compgen -W "--help --mode" -- $cur_word)) ;; + query) + COMPREPLY=($(compgen -W "--all --help --instance-data --list-keys --user-data --vendor-data --debug" -- $cur_word));; single) COMPREPLY=($(compgen -W "--help --name --frequency --report" -- $cur_word)) ;; diff --git a/cloudinit/cmd/devel/render.py b/cloudinit/cmd/devel/render.py index e85933db..2ba6b681 100755 --- a/cloudinit/cmd/devel/render.py +++ b/cloudinit/cmd/devel/render.py @@ -9,7 +9,6 @@ import sys from cloudinit.handlers.jinja_template import render_jinja_payload_from_file from cloudinit import log from cloudinit.sources import INSTANCE_JSON_FILE -from cloudinit import util from . import addLogHandlerCLI, read_cfg_paths NAME = 'render' @@ -54,11 +53,7 @@ def handle_args(name, args): paths.run_dir, INSTANCE_JSON_FILE) else: instance_data_fn = args.instance_data - try: - with open(instance_data_fn) as stream: - instance_data = stream.read() - instance_data = util.load_json(instance_data) - except IOError: + if not os.path.exists(instance_data_fn): LOG.error('Missing instance-data.json file: %s', instance_data_fn) return 1 try: diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index 0eee583c..5a437020 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -791,6 +791,10 @@ def main(sysv_args=None): ' pass to this module')) parser_single.set_defaults(action=('single', main_single)) + parser_query = subparsers.add_parser( + 'query', + help='Query standardized instance metadata from the command line.') + parser_dhclient = subparsers.add_parser('dhclient-hook', help=('run the dhclient hook' 'to record network info')) @@ -842,6 +846,12 @@ def main(sysv_args=None): clean_parser(parser_clean) parser_clean.set_defaults( action=('clean', handle_clean_args)) + elif sysv_args[0] == 'query': + from cloudinit.cmd.query import ( + get_parser as query_parser, handle_args as handle_query_args) + query_parser(parser_query) + parser_query.set_defaults( + action=('render', handle_query_args)) elif sysv_args[0] == 'status': from cloudinit.cmd.status import ( get_parser as status_parser, handle_status_args) diff --git a/cloudinit/cmd/query.py b/cloudinit/cmd/query.py new file mode 100644 index 00000000..7d2d4fe4 --- /dev/null +++ b/cloudinit/cmd/query.py @@ -0,0 +1,155 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +"""Query standardized instance metadata from the command line.""" + +import argparse +import os +import six +import sys + +from cloudinit.handlers.jinja_template import ( + convert_jinja_instance_data, render_jinja_payload) +from cloudinit.cmd.devel import addLogHandlerCLI, read_cfg_paths +from cloudinit import log +from cloudinit.sources import ( + INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE) +from cloudinit import util + +NAME = 'query' +LOG = log.getLogger(NAME) + + +def get_parser(parser=None): + """Build or extend an arg parser for query utility. + + @param parser: Optional existing ArgumentParser instance representing the + query subcommand which will be extended to support the args of + this utility. + + @returns: ArgumentParser with proper argument configuration. + """ + if not parser: + parser = argparse.ArgumentParser( + prog=NAME, description='Query cloud-init instance data') + parser.add_argument( + '-d', '--debug', action='store_true', default=False, + help='Add verbose messages during template render') + parser.add_argument( + '-i', '--instance-data', type=str, + help=('Path to instance-data.json file. Default is /run/cloud-init/%s' + % INSTANCE_JSON_FILE)) + parser.add_argument( + '-l', '--list-keys', action='store_true', default=False, + help=('List query keys available at the provided instance-data' + ' .')) + parser.add_argument( + '-u', '--user-data', type=str, + help=('Path to user-data file. Default is' + ' /var/lib/cloud/instance/user-data.txt')) + parser.add_argument( + '-v', '--vendor-data', type=str, + help=('Path to vendor-data file. Default is' + ' /var/lib/cloud/instance/vendor-data.txt')) + parser.add_argument( + 'varname', type=str, nargs='?', + help=('A dot-delimited instance data variable to query from' + ' instance-data query. For example: v2.local_hostname')) + parser.add_argument( + '-a', '--all', action='store_true', default=False, dest='dump_all', + help='Dump all available instance-data') + parser.add_argument( + '-f', '--format', type=str, dest='format', + help=('Optionally specify a custom output format string. Any' + ' instance-data variable can be specified between double-curly' + ' braces. For example -f "{{ v2.cloud_name }}"')) + return parser + + +def handle_args(name, args): + """Handle calls to 'cloud-init query' as a subcommand.""" + paths = None + addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING) + if not any([args.list_keys, args.varname, args.format, args.dump_all]): + LOG.error( + 'Expected one of the options: --all, --format,' + ' --list-keys or varname') + get_parser().print_help() + return 1 + + uid = os.getuid() + if not all([args.instance_data, args.user_data, args.vendor_data]): + paths = read_cfg_paths() + if not args.instance_data: + if uid == 0: + default_json_fn = INSTANCE_JSON_SENSITIVE_FILE + else: + default_json_fn = INSTANCE_JSON_FILE # World readable + instance_data_fn = os.path.join(paths.run_dir, default_json_fn) + else: + instance_data_fn = args.instance_data + if not args.user_data: + user_data_fn = os.path.join(paths.instance_link, 'user-data.txt') + else: + user_data_fn = args.user_data + if not args.vendor_data: + vendor_data_fn = os.path.join(paths.instance_link, 'vendor-data.txt') + else: + vendor_data_fn = args.vendor_data + + try: + instance_json = util.load_file(instance_data_fn) + except IOError: + LOG.error('Missing instance-data.json file: %s', instance_data_fn) + return 1 + + instance_data = util.load_json(instance_json) + if uid != 0: + instance_data['userdata'] = ( + '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, user_data_fn)) + instance_data['vendordata'] = ( + '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, vendor_data_fn)) + else: + instance_data['userdata'] = util.load_file(user_data_fn) + instance_data['vendordata'] = util.load_file(vendor_data_fn) + if args.format: + payload = '## template: jinja\n{fmt}'.format(fmt=args.format) + rendered_payload = render_jinja_payload( + payload=payload, payload_fn='query commandline', + instance_data=instance_data, + debug=True if args.debug else False) + if rendered_payload: + print(rendered_payload) + return 0 + return 1 + + response = convert_jinja_instance_data(instance_data) + if args.varname: + try: + for var in args.varname.split('.'): + response = response[var] + except KeyError: + LOG.error('Undefined instance-data key %s', args.varname) + return 1 + if args.list_keys: + if not isinstance(response, dict): + LOG.error("--list-keys provided but '%s' is not a dict", var) + return 1 + response = '\n'.join(sorted(response.keys())) + elif args.list_keys: + response = '\n'.join(sorted(response.keys())) + if not isinstance(response, six.string_types): + response = util.json_dumps(response) + print(response) + return 0 + + +def main(): + """Tool to query specific instance-data values.""" + parser = get_parser() + sys.exit(handle_args(NAME, parser.parse_args())) + + +if __name__ == '__main__': + main() + +# vi: ts=4 expandtab diff --git a/cloudinit/cmd/tests/test_query.py b/cloudinit/cmd/tests/test_query.py new file mode 100644 index 00000000..fb87c6ab --- /dev/null +++ b/cloudinit/cmd/tests/test_query.py @@ -0,0 +1,193 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from six import StringIO +from textwrap import dedent +import os + +from collections import namedtuple +from cloudinit.cmd import query +from cloudinit.helpers import Paths +from cloudinit.sources import REDACT_SENSITIVE_VALUE, INSTANCE_JSON_FILE +from cloudinit.tests.helpers import CiTestCase, mock +from cloudinit.util import ensure_dir, write_file + + +class TestQuery(CiTestCase): + + with_logs = True + + args = namedtuple( + 'queryargs', + ('debug dump_all format instance_data list_keys user_data vendor_data' + ' varname')) + + def setUp(self): + super(TestQuery, self).setUp() + self.tmp = self.tmp_dir() + self.instance_data = self.tmp_path('instance-data', dir=self.tmp) + + def test_handle_args_error_on_missing_param(self): + """Error when missing required parameters and print usage.""" + args = self.args( + debug=False, dump_all=False, format=None, instance_data=None, + list_keys=False, user_data=None, vendor_data=None, varname=None) + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(1, query.handle_args('anyname', args)) + expected_error = ( + 'ERROR: Expected one of the options: --all, --format, --list-keys' + ' or varname\n') + self.assertIn(expected_error, self.logs.getvalue()) + self.assertIn('usage: query', m_stdout.getvalue()) + self.assertIn(expected_error, m_stderr.getvalue()) + + def test_handle_args_error_on_missing_instance_data(self): + """When instance_data file path does not exist, log an error.""" + absent_fn = self.tmp_path('absent', dir=self.tmp) + args = self.args( + debug=False, dump_all=True, format=None, instance_data=absent_fn, + list_keys=False, user_data='ud', vendor_data='vd', varname=None) + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + self.assertEqual(1, query.handle_args('anyname', args)) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % absent_fn, + self.logs.getvalue()) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % absent_fn, + m_stderr.getvalue()) + + def test_handle_args_defaults_instance_data(self): + """When no instance_data argument, default to configured run_dir.""" + args = self.args( + debug=False, dump_all=True, format=None, instance_data=None, + list_keys=False, user_data=None, vendor_data=None, varname=None) + run_dir = self.tmp_path('run_dir', dir=self.tmp) + ensure_dir(run_dir) + paths = Paths({'run_dir': run_dir}) + self.add_patch('cloudinit.cmd.query.read_cfg_paths', 'm_paths') + self.m_paths.return_value = paths + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + self.assertEqual(1, query.handle_args('anyname', args)) + json_file = os.path.join(run_dir, INSTANCE_JSON_FILE) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % json_file, + self.logs.getvalue()) + self.assertIn( + 'ERROR: Missing instance-data.json file: %s' % json_file, + m_stderr.getvalue()) + + def test_handle_args_dumps_all_instance_data(self): + """When --all is specified query will dump all instance data vars.""" + write_file(self.instance_data, '{"my-var": "it worked"}') + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, list_keys=False, + user_data='ud', vendor_data='vd', varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual( + '{\n "my_var": "it worked",\n "userdata": "<%s> file:ud",\n' + ' "vendordata": "<%s> file:vd"\n}\n' % ( + REDACT_SENSITIVE_VALUE, REDACT_SENSITIVE_VALUE), + m_stdout.getvalue()) + + def test_handle_args_returns_top_level_varname(self): + """When the argument varname is passed, report its value.""" + write_file(self.instance_data, '{"my-var": "it worked"}') + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, list_keys=False, + user_data='ud', vendor_data='vd', varname='my_var') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual('it worked\n', m_stdout.getvalue()) + + def test_handle_args_returns_nested_varname(self): + """If user_data file is a jinja template render instance-data vars.""" + write_file(self.instance_data, + '{"v1": {"key-2": "value-2"}, "my-var": "it worked"}') + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, user_data='ud', vendor_data='vd', + list_keys=False, varname='v1.key_2') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual('value-2\n', m_stdout.getvalue()) + + def test_handle_args_returns_standardized_vars_to_top_level_aliases(self): + """Any standardized vars under v# are promoted as top-level aliases.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},' + ' "top": "gun"}') + expected = dedent("""\ + { + "top": "gun", + "userdata": " file:ud", + "v1": { + "v1_1": "val1.1" + }, + "v1_1": "val1.1", + "v2": { + "v2_2": "val2.2" + }, + "v2_2": "val2.2", + "vendordata": " file:vd" + } + """) + args = self.args( + debug=False, dump_all=True, format=None, + instance_data=self.instance_data, user_data='ud', vendor_data='vd', + list_keys=False, varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_sorts_top_level_keys_when_no_varname(self): + """Sort all top-level keys when only --list-keys provided.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},' + ' "top": "gun"}') + expected = 'top\nuserdata\nv1\nv1_1\nv2\nv2_2\nvendordata\n' + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, user_data='ud', + vendor_data='vd', varname=None) + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_sorts_nested_keys_when_varname(self): + """Sort all nested keys of varname object when --list-keys provided.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2":' + + ' {"v2_2": "val2.2"}, "top": "gun"}') + expected = 'v1_1\nv1_2\n' + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, + user_data='ud', vendor_data='vd', varname='v1') + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(0, query.handle_args('anyname', args)) + self.assertEqual(expected, m_stdout.getvalue()) + + def test_handle_args_list_keys_errors_when_varname_is_not_a_dict(self): + """Raise an error when --list-keys and varname specify a non-list.""" + write_file( + self.instance_data, + '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2": ' + + '{"v2_2": "val2.2"}, "top": "gun"}') + expected_error = "ERROR: --list-keys provided but 'top' is not a dict" + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=self.instance_data, list_keys=True, user_data='ud', + vendor_data='vd', varname='top') + with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr: + with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout: + self.assertEqual(1, query.handle_args('anyname', args)) + self.assertEqual('', m_stdout.getvalue()) + self.assertIn(expected_error, m_stderr.getvalue()) + +# vi: ts=4 expandtab diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py index 3cc1fb19..dcd2645e 100644 --- a/cloudinit/helpers.py +++ b/cloudinit/helpers.py @@ -239,6 +239,10 @@ class ConfigMerger(object): if cc_fn and os.path.isfile(cc_fn): try: i_cfgs.append(util.read_conf(cc_fn)) + except PermissionError: + LOG.debug( + 'Skipped loading cloud-config from %s due to' + ' non-root.', cc_fn) except Exception: util.logexc(LOG, 'Failed loading of cloud-config from %s', cc_fn) diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index a775f1a8..730e8174 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -38,8 +38,12 @@ DEP_FILESYSTEM = "FILESYSTEM" DEP_NETWORK = "NETWORK" DS_PREFIX = 'DataSource' -# File in which instance meta-data, user-data and vendor-data is written +# File in which public available instance meta-data is written +# security-sensitive key values are redacted from this world-readable file INSTANCE_JSON_FILE = 'instance-data.json' +# security-sensitive key values are present in this root-readable file +INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json' +REDACT_SENSITIVE_VALUE = 'redacted for non-root user' # Key which can be provide a cloud's official product name to cloud-init METADATA_CLOUD_NAME_KEY = 'cloud-name' @@ -58,7 +62,7 @@ class InvalidMetaDataException(Exception): pass -def process_instance_metadata(metadata, key_path=''): +def process_instance_metadata(metadata, key_path='', sensitive_keys=()): """Process all instance metadata cleaning it up for persisting as json. Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list @@ -67,22 +71,46 @@ def process_instance_metadata(metadata, key_path=''): """ md_copy = copy.deepcopy(metadata) md_copy['base64_encoded_keys'] = [] + md_copy['sensitive_keys'] = [] for key, val in metadata.items(): if key_path: sub_key_path = key_path + '/' + key else: sub_key_path = key + if key in sensitive_keys or sub_key_path in sensitive_keys: + md_copy['sensitive_keys'].append(sub_key_path) if isinstance(val, str) and val.startswith('ci-b64:'): md_copy['base64_encoded_keys'].append(sub_key_path) md_copy[key] = val.replace('ci-b64:', '') if isinstance(val, dict): - return_val = process_instance_metadata(val, sub_key_path) + return_val = process_instance_metadata( + val, sub_key_path, sensitive_keys) md_copy['base64_encoded_keys'].extend( return_val.pop('base64_encoded_keys')) + md_copy['sensitive_keys'].extend( + return_val.pop('sensitive_keys')) md_copy[key] = return_val return md_copy +def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE): + """Redact any sensitive keys from to provided metadata dictionary. + + Replace any keys values listed in 'sensitive_keys' with redact_value. + """ + if not metadata.get('sensitive_keys', []): + return metadata + md_copy = copy.deepcopy(metadata) + for key_path in metadata.get('sensitive_keys'): + path_parts = key_path.split('/') + obj = md_copy + for path in path_parts: + if isinstance(obj[path], dict) and path != path_parts[-1]: + obj = obj[path] + obj[path] = redact_value + return md_copy + + URLParams = namedtuple( 'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries']) @@ -127,6 +155,10 @@ class DataSource(object): _dirty_cache = False + # N-tuple of keypaths or keynames redact from instance-data.json for + # non-root users + sensitive_metadata_keys = ('security-credentials',) + def __init__(self, sys_cfg, distro, paths, ud_proc=None): self.sys_cfg = sys_cfg self.distro = distro @@ -152,12 +184,24 @@ class DataSource(object): def _get_standardized_metadata(self): """Return a dictionary of standardized metadata keys.""" - return {'v1': { - 'local-hostname': self.get_hostname(), - 'instance-id': self.get_instance_id(), - 'cloud-name': self.cloud_name, - 'region': self.region, - 'availability-zone': self.availability_zone}} + local_hostname = self.get_hostname() + instance_id = self.get_instance_id() + availability_zone = self.availability_zone + cloud_name = self.cloud_name + # When adding new standard keys prefer underscore-delimited instead + # of hyphen-delimted to support simple variable references in jinja + # templates. + return { + 'v1': { + 'availability-zone': availability_zone, + 'availability_zone': availability_zone, + 'cloud-name': cloud_name, + 'cloud_name': cloud_name, + 'instance-id': instance_id, + 'instance_id': instance_id, + 'local-hostname': local_hostname, + 'local_hostname': local_hostname, + 'region': self.region}} def clear_cached_attrs(self, attr_defaults=()): """Reset any cached metadata attributes to datasource defaults. @@ -200,9 +244,7 @@ class DataSource(object): """ instance_data = { 'ds': { - 'meta_data': self.metadata, - 'user_data': self.get_userdata_raw(), - 'vendor_data': self.get_vendordata_raw()}} + 'meta_data': self.metadata}} if hasattr(self, 'network_json'): network_json = getattr(self, 'network_json') if network_json != UNSET: @@ -217,7 +259,9 @@ class DataSource(object): # Process content base64encoding unserializable values content = util.json_dumps(instance_data) # Strip base64: prefix and set base64_encoded_keys list. - processed_data = process_instance_metadata(json.loads(content)) + processed_data = process_instance_metadata( + json.loads(content), + sensitive_keys=self.sensitive_metadata_keys) except TypeError as e: LOG.warning('Error persisting instance-data.json: %s', str(e)) return False @@ -225,7 +269,11 @@ class DataSource(object): LOG.warning('Error persisting instance-data.json: %s', str(e)) return False json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE) - write_json(json_file, processed_data, mode=0o600) + write_json(json_file, processed_data) # World readable + json_sensitive_file = os.path.join(self.paths.run_dir, + INSTANCE_JSON_SENSITIVE_FILE) + write_json(json_sensitive_file, + redact_sensitive_keys(processed_data), mode=0o600) return True def _get_data(self): diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index 8299af23..6b965750 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -1,5 +1,6 @@ # This file is part of cloud-init. See LICENSE file for license information. +import copy import inspect import os import six @@ -9,7 +10,8 @@ from cloudinit.event import EventType from cloudinit.helpers import Paths from cloudinit import importer from cloudinit.sources import ( - INSTANCE_JSON_FILE, DataSource, UNSET) + INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE, + UNSET, DataSource, redact_sensitive_keys) from cloudinit.tests.helpers import CiTestCase, skipIf, mock from cloudinit.user_data import UserDataProcessor from cloudinit import util @@ -20,20 +22,24 @@ class DataSourceTestSubclassNet(DataSource): dsname = 'MyTestSubclass' url_max_wait = 55 - def __init__(self, sys_cfg, distro, paths, custom_userdata=None, - get_data_retval=True): + def __init__(self, sys_cfg, distro, paths, custom_metadata=None, + custom_userdata=None, get_data_retval=True): super(DataSourceTestSubclassNet, self).__init__( sys_cfg, distro, paths) self._custom_userdata = custom_userdata + self._custom_metadata = custom_metadata self._get_data_retval = get_data_retval def _get_cloud_name(self): return 'SubclassCloudName' def _get_data(self): - self.metadata = {'availability_zone': 'myaz', - 'local-hostname': 'test-subclass-hostname', - 'region': 'myregion'} + if self._custom_metadata: + self.metadata = self._custom_metadata + else: + self.metadata = {'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion'} if self._custom_userdata: self.userdata_raw = self._custom_userdata else: @@ -278,7 +284,7 @@ class TestDataSource(CiTestCase): os.path.exists(json_file), 'Found unexpected file %s' % json_file) def test_get_data_writes_json_instance_data_on_success(self): - """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root.""" + """get_data writes INSTANCE_JSON_FILE to run_dir as world readable.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp})) @@ -287,40 +293,90 @@ class TestDataSource(CiTestCase): content = util.load_file(json_file) expected = { 'base64_encoded_keys': [], + 'sensitive_keys': [], 'v1': { 'availability-zone': 'myaz', + 'availability_zone': 'myaz', 'cloud-name': 'subclasscloudname', + 'cloud_name': 'subclasscloudname', 'instance-id': 'iid-datasource', + 'instance_id': 'iid-datasource', 'local-hostname': 'test-subclass-hostname', + 'local_hostname': 'test-subclass-hostname', 'region': 'myregion'}, 'ds': { 'meta_data': {'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', - 'region': 'myregion'}, - 'user_data': 'userdata_raw', - 'vendor_data': 'vendordata_raw'}} - self.maxDiff = None + 'region': 'myregion'}}} self.assertEqual(expected, util.load_json(content)) file_stat = os.stat(json_file) + self.assertEqual(0o644, stat.S_IMODE(file_stat.st_mode)) + self.assertEqual(expected, util.load_json(content)) + + def test_get_data_writes_json_instance_data_sensitive(self): + """get_data writes INSTANCE_JSON_SENSITIVE_FILE as readonly root.""" + tmp = self.tmp_dir() + datasource = DataSourceTestSubclassNet( + self.sys_cfg, self.distro, Paths({'run_dir': tmp}), + custom_metadata={ + 'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion', + 'some': {'security-credentials': { + 'cred1': 'sekret', 'cred2': 'othersekret'}}}) + self.assertEqual( + ('security-credentials',), datasource.sensitive_metadata_keys) + datasource.get_data() + json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) + sensitive_json_file = self.tmp_path(INSTANCE_JSON_SENSITIVE_FILE, tmp) + redacted = util.load_json(util.load_file(json_file)) + self.assertEqual( + {'cred1': 'sekret', 'cred2': 'othersekret'}, + redacted['ds']['meta_data']['some']['security-credentials']) + content = util.load_file(sensitive_json_file) + expected = { + 'base64_encoded_keys': [], + 'sensitive_keys': ['ds/meta_data/some/security-credentials'], + 'v1': { + 'availability-zone': 'myaz', + 'availability_zone': 'myaz', + 'cloud-name': 'subclasscloudname', + 'cloud_name': 'subclasscloudname', + 'instance-id': 'iid-datasource', + 'instance_id': 'iid-datasource', + 'local-hostname': 'test-subclass-hostname', + 'local_hostname': 'test-subclass-hostname', + 'region': 'myregion'}, + 'ds': { + 'meta_data': { + 'availability_zone': 'myaz', + 'local-hostname': 'test-subclass-hostname', + 'region': 'myregion', + 'some': {'security-credentials': REDACT_SENSITIVE_VALUE}}} + } + self.maxDiff = None + self.assertEqual(expected, util.load_json(content)) + file_stat = os.stat(sensitive_json_file) self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode)) + self.assertEqual(expected, util.load_json(content)) def test_get_data_handles_redacted_unserializable_content(self): """get_data warns unserializable content in INSTANCE_JSON_FILE.""" tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': self.paths}}) datasource.get_data() json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) - expected_userdata = { + expected_metadata = { 'key1': 'val1', 'key2': { 'key2.1': "Warning: redacted unserializable type "}} instance_json = util.load_json(content) self.assertEqual( - expected_userdata, instance_json['ds']['user_data']) + expected_metadata, instance_json['ds']['meta_data']) def test_persist_instance_data_writes_ec2_metadata_when_set(self): """When ec2_metadata class attribute is set, persist to json.""" @@ -361,17 +417,17 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) instance_json = util.load_json(content) - self.assertEqual( - ['ds/user_data/key2/key2.1'], + self.assertItemsEqual( + ['ds/meta_data/key2/key2.1'], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': 'EjM='}}, - instance_json['ds']['user_data']) + instance_json['ds']['meta_data']) @skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes") def test_get_data_handles_bytes_values(self): @@ -379,7 +435,7 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) content = util.load_file(json_file) @@ -387,7 +443,7 @@ class TestDataSource(CiTestCase): self.assertEqual([], instance_json['base64_encoded_keys']) self.assertEqual( {'key1': 'val1', 'key2': {'key2.1': '\x123'}}, - instance_json['ds']['user_data']) + instance_json['ds']['meta_data']) @skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8") def test_non_utf8_encoding_logs_warning(self): @@ -395,7 +451,7 @@ class TestDataSource(CiTestCase): tmp = self.tmp_dir() datasource = DataSourceTestSubclassNet( self.sys_cfg, self.distro, Paths({'run_dir': tmp}), - custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) + custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}}) self.assertTrue(datasource.get_data()) json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp) self.assertFalse(os.path.exists(json_file)) @@ -509,4 +565,36 @@ class TestDataSource(CiTestCase): self.logs.getvalue()) +class TestRedactSensitiveData(CiTestCase): + + def test_redact_sensitive_data_noop_when_no_sensitive_keys_present(self): + """When sensitive_keys is absent or empty from metadata do nothing.""" + md = {'my': 'data'} + self.assertEqual( + md, redact_sensitive_keys(md, redact_value='redacted')) + md['sensitive_keys'] = [] + self.assertEqual( + md, redact_sensitive_keys(md, redact_value='redacted')) + + def test_redact_sensitive_data_redacts_exact_match_name(self): + """Only exact matched sensitive_keys are redacted from metadata.""" + md = {'sensitive_keys': ['md/secure'], + 'md': {'secure': 's3kr1t', 'insecure': 'publik'}} + secure_md = copy.deepcopy(md) + secure_md['md']['secure'] = 'redacted' + self.assertEqual( + secure_md, + redact_sensitive_keys(md, redact_value='redacted')) + + def test_redact_sensitive_data_does_redacts_with_default_string(self): + """When redact_value is absent, REDACT_SENSITIVE_VALUE is used.""" + md = {'sensitive_keys': ['md/secure'], + 'md': {'secure': 's3kr1t', 'insecure': 'publik'}} + secure_md = copy.deepcopy(md) + secure_md['md']['secure'] = 'redacted for non-root user' + self.assertEqual( + secure_md, + redact_sensitive_keys(md)) + + # vi: ts=4 expandtab diff --git a/doc/rtd/index.rst b/doc/rtd/index.rst index de67f361..20a99a30 100644 --- a/doc/rtd/index.rst +++ b/doc/rtd/index.rst @@ -31,6 +31,7 @@ initialization of a cloud instance. topics/capabilities.rst topics/availability.rst topics/format.rst + topics/instancedata.rst topics/dir_layout.rst topics/examples.rst topics/boot.rst diff --git a/doc/rtd/topics/capabilities.rst b/doc/rtd/topics/capabilities.rst index 2d8e2538..0d8b8947 100644 --- a/doc/rtd/topics/capabilities.rst +++ b/doc/rtd/topics/capabilities.rst @@ -18,7 +18,7 @@ User configurability User-data can be given by the user at instance launch time. See :ref:`user_data_formats` for acceptable user-data content. - + This is done via the ``--user-data`` or ``--user-data-file`` argument to ec2-run-instances for example. @@ -53,10 +53,9 @@ system: % cloud-init --help usage: cloud-init [-h] [--version] [--file FILES] - [--debug] [--force] - {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} - ... + {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status} + ... optional arguments: -h, --help show this help message and exit @@ -68,17 +67,19 @@ system: your own risk) Subcommands: - {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status} + {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status} init initializes cloud-init and performs initial modules modules activates modules using a given configuration key single run a single module + query Query instance metadata from the command line dhclient-hook run the dhclient hookto record network info features list defined features analyze Devel tool: Analyze cloud-init logs and data devel Run development tools collect-logs Collect and tar all cloud-init debug info - clean Remove logs and artifacts so cloud-init can re-run. - status Report cloud-init status or wait on completion. + clean Remove logs and artifacts so cloud-init can re-run + status Report cloud-init status or wait on completion + CLI Subcommand details ====================== @@ -104,8 +105,8 @@ cloud-init status Report whether cloud-init is running, done, disabled or errored. Exits non-zero if an error is detected in cloud-init. - * **--long**: Detailed status information. - * **--wait**: Block until cloud-init completes. +* **--long**: Detailed status information. +* **--wait**: Block until cloud-init completes. .. code-block:: shell-session @@ -143,6 +144,68 @@ Logs collected are: * journalctl output * /var/lib/cloud/instance/user-data.txt +.. _cli_query: + +cloud-init query +------------------ +Query standardized cloud instance metadata crawled by cloud-init and stored +in ``/run/cloud-init/instance-data.json``. This is a convenience command-line +interface to reference any cached configuration metadata that cloud-init +crawls when booting the instance. See :ref:`instance_metadata` for more info. + +* **--all**: Dump all available instance data as json which can be queried. +* **--instance-data**: Optional path to a different instance-data.json file to + source for queries. +* **--list-keys**: List available query keys from cached instance data. + +.. code-block:: shell-session + + # List all top-level query keys available (includes standardized aliases) + % cloud-init query --list-keys + availability_zone + base64_encoded_keys + cloud_name + ds + instance_id + local_hostname + region + v1 + +* ****: A dot-delimited variable path into the instance-data.json + object. + +.. code-block:: shell-session + + # Query cloud-init standardized metadata on any cloud + % cloud-init query v1.cloud_name + aws # or openstack, azure, gce etc. + + # Any standardized instance-data under a key is aliased as a top-level + # key for convenience. + % cloud-init query cloud_name + aws # or openstack, azure, gce etc. + + # Query datasource-specific metadata on EC2 + % cloud-init query ds.meta_data.public_ipv4 + +* **--format** A string that will use jinja-template syntax to render a string + replacing + +.. code-block:: shell-session + + # Generate a custom hostname fqdn based on instance-id, cloud and region + % cloud-init query --format 'custom-{{instance_id}}.{{region}}.{{v1.cloud_name}}.com' + custom-i-0e91f69987f37ec74.us-east-2.aws.com + + +.. note:: + The standardized instance data keys under **v#** are guaranteed not to change + behavior or format. If using top-level convenience aliases for any + standardized instance data keys, the most value (highest **v#**) of that key + name is what is reported as the top-level value. So these aliases act as a + 'latest'. + + .. _cli_analyze: cloud-init analyze @@ -150,10 +213,10 @@ cloud-init analyze Get detailed reports of where cloud-init spends most of its time. See :ref:`boot_time_analysis` for more info. - * **blame** Report ordered by most costly operations. - * **dump** Machine-readable JSON dump of all cloud-init tracked events. - * **show** show time-ordered report of the cost of operations during each - boot stage. +* **blame** Report ordered by most costly operations. +* **dump** Machine-readable JSON dump of all cloud-init tracked events. +* **show** show time-ordered report of the cost of operations during each + boot stage. .. _cli_devel: @@ -182,8 +245,8 @@ cloud-init clean Remove cloud-init artifacts from /var/lib/cloud and optionally reboot the machine to so cloud-init re-runs all stages as it did on first boot. - * **--logs**: Optionally remove /var/log/cloud-init*log files. - * **--reboot**: Reboot the system after removing artifacts. +* **--logs**: Optionally remove /var/log/cloud-init*log files. +* **--reboot**: Reboot the system after removing artifacts. .. _cli_init: @@ -195,7 +258,7 @@ Can be run on the commandline, but is generally gated to run only once due to semaphores in **/var/lib/cloud/instance/sem/** and **/var/lib/cloud/sem**. - * **--local**: Run *init-local* stage instead of *init*. +* **--local**: Run *init-local* stage instead of *init*. .. _cli_modules: @@ -210,8 +273,8 @@ declared to run in various boot stages in the file commandline, but each module is gated to run only once due to semaphores in ``/var/lib/cloud/``. - * **--mode (init|config|final)**: Run *modules:init*, *modules:config* or - *modules:final* cloud-init stages. See :ref:`boot_stages` for more info. +* **--mode (init|config|final)**: Run *modules:init*, *modules:config* or + *modules:final* cloud-init stages. See :ref:`boot_stages` for more info. .. _cli_single: @@ -221,9 +284,9 @@ Attempt to run a single named cloud config module. The following example re-runs the cc_set_hostname module ignoring the module default frequency of once-per-instance: - * **--name**: The cloud-config module name to run - * **--frequency**: Optionally override the declared module frequency - with one of (always|once-per-instance|once) +* **--name**: The cloud-config module name to run +* **--frequency**: Optionally override the declared module frequency + with one of (always|once-per-instance|once) .. code-block:: shell-session diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index 14432e65..e34f145c 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -17,146 +17,10 @@ own way) internally a datasource abstract class was created to allow for a single way to access the different cloud systems methods to provide this data through the typical usage of subclasses. - -.. _instance_metadata: - -instance-data -------------- -For reference, cloud-init stores all the metadata, vendordata and userdata -provided by a cloud in a json blob at ``/run/cloud-init/instance-data.json``. -While the json contains datasource-specific keys and names, cloud-init will -maintain a minimal set of standardized keys that will remain stable on any -cloud. Standardized instance-data keys will be present under a "v1" key. -Any datasource metadata cloud-init consumes will all be present under the -"ds" key. - -Below is an instance-data.json example from an OpenStack instance: - -.. sourcecode:: json - - { - "base64-encoded-keys": [ - "ds/meta-data/random_seed", - "ds/user-data" - ], - "ds": { - "ec2_metadata": { - "ami-id": "ami-0000032f", - "ami-launch-index": "0", - "ami-manifest-path": "FIXME", - "block-device-mapping": { - "ami": "vda", - "ephemeral0": "/dev/vdb", - "root": "/dev/vda" - }, - "hostname": "xenial-test.novalocal", - "instance-action": "none", - "instance-id": "i-0006e030", - "instance-type": "m1.small", - "local-hostname": "xenial-test.novalocal", - "local-ipv4": "10.5.0.6", - "placement": { - "availability-zone": "None" - }, - "public-hostname": "xenial-test.novalocal", - "public-ipv4": "10.245.162.145", - "reservation-id": "r-fxm623oa", - "security-groups": "default" - }, - "meta-data": { - "availability_zone": null, - "devices": [], - "hostname": "xenial-test.novalocal", - "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0", - "launch_index": 0, - "local-hostname": "xenial-test.novalocal", - "name": "xenial-test", - "project_id": "e0eb2d2538814...", - "random_seed": "A6yPN...", - "uuid": "3e39d278-0644-4728-9479-678f92..." - }, - "network_json": { - "links": [ - { - "ethernet_mac_address": "fa:16:3e:7d:74:9b", - "id": "tap9ca524d5-6e", - "mtu": 8958, - "type": "ovs", - "vif_id": "9ca524d5-6e5a-4809-936a-6901..." - } - ], - "networks": [ - { - "id": "network0", - "link": "tap9ca524d5-6e", - "network_id": "c6adfc18-9753-42eb-b3ea-18b57e6b837f", - "type": "ipv4_dhcp" - } - ], - "services": [ - { - "address": "10.10.160.2", - "type": "dns" - } - ] - }, - "user-data": "I2Nsb3VkLWNvbmZpZ...", - "vendor-data": null - }, - "v1": { - "availability-zone": null, - "cloud-name": "openstack", - "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0", - "local-hostname": "xenial-test", - "region": null - } - } - - -As of cloud-init v. 18.4, any values present in -``/run/cloud-init/instance-data.json`` can be used in cloud-init user data -scripts or cloud config data. This allows consumers to use cloud-init's -vendor-neutral, standardized metadata keys as well as datasource-specific -content for any scripts or cloud-config modules they are using. - -To use instance-data.json values in scripts and **#config-config** files the -user-data will need to contain the following header as the first line **## template: jinja**. Cloud-init will source all variables defined in -``/run/cloud-init/instance-data.json`` and allow scripts or cloud-config files -to reference those paths. Below are two examples:: - - * Cloud config calling home with the ec2 public hostname and avaliability-zone - ``` - ## template: jinja - #cloud-config - runcmd: - - echo 'EC2 public hostname allocated to instance: {{ ds.meta_data.public_hostname }}' > /tmp/instance_metadata - - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> /tmp/instance_metadata - - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", "availability-zone": "{{ v1.availability_zone }}"}' https://example.com.com - ``` - - * Custom user script performing different operations based on region - ``` - ## template: jinja - #!/bin/bash - {% if v1.region == 'us-east-2' -%} - echo 'Installing custom proxies for {{ v1.region }} - sudo apt-get install my-xtra-fast-stack - {%- endif %} - ... - - ``` - -.. note:: - Trying to reference jinja variables that don't exist in - instance-data.json will result in warnings in ``/var/log/cloud-init.log`` - and the following string in your rendered user-data: - ``CI_MISSING_JINJA_VAR/``. - -.. note:: - To save time designing your user-data for a specific cloud's - instance-data.json, use the 'render' cloud-init command on an - instance booted on your favorite cloud. See :ref:`cli_devel` for more - information. +Any metadata processed by cloud-init's datasources is persisted as +``/run/cloud0-init/instance-data.json``. Cloud-init provides tooling +to quickly introspect some of that data. See :ref:`instance_metadata` for +more information. Datasource API @@ -196,14 +60,14 @@ The current interface that a datasource object must provide is the following: # or does not exist) def device_name_to_device(self, name) - # gets the locale string this instance should be applying + # gets the locale string this instance should be applying # which typically used to adjust the instances locale settings files def get_locale(self) @property def availability_zone(self) - # gets the instance id that was assigned to this instance by the + # gets the instance id that was assigned to this instance by the # cloud provider or when said instance id does not exist in the backing # metadata this will return 'iid-datasource' def get_instance_id(self) diff --git a/doc/rtd/topics/instancedata.rst b/doc/rtd/topics/instancedata.rst new file mode 100644 index 00000000..634e1807 --- /dev/null +++ b/doc/rtd/topics/instancedata.rst @@ -0,0 +1,297 @@ +.. _instance_metadata: + +***************** +Instance Metadata +***************** + +What is a instance data? +======================== + +Instance data is the collection of all configuration data that cloud-init +processes to configure the instance. This configuration typically +comes from any number of sources: + +* cloud-provided metadata services (aka metadata) +* custom config-drive attached to the instance +* cloud-config seed files in the booted cloud image or distribution +* vendordata provided from files or cloud metadata services +* userdata provided at instance creation + +Each cloud provider presents unique configuration metadata in different +formats to the instance. Cloud-init provides a cache of any crawled metadata +as well as a versioned set of standardized instance data keys which it makes +available on all platforms. + +Cloud-init produces a simple json object in +``/run/cloud-init/instance-data.json`` which represents standardized and +versioned representation of the metadata it consumes during initial boot. The +intent is to provide the following benefits to users or scripts on any system +deployed with cloud-init: + +* simple static object to query to obtain a instance's metadata +* speed: avoid costly network transactions for metadata that is already cached + on the filesytem +* reduce need to recrawl metadata services for static metadata that is already + cached +* leverage cloud-init's best practices for crawling cloud-metadata services +* avoid rolling unique metadata crawlers on each cloud platform to get + metadata configuration values + +Cloud-init stores any instance data processed in the following files: + +* ``/run/cloud-init/instance-data.json``: world-readable json containing + standardized keys, sensitive keys redacted +* ``/run/cloud-init/instance-data-sensitive.json``: root-readable unredacted + json blob +* ``/var/lib/cloud/instance/user-data.txt``: root-readable sensitive raw + userdata +* ``/var/lib/cloud/instance/vendor-data.txt``: root-readable sensitive raw + vendordata + +Cloud-init redacts any security sensitive content from instance-data.json, +stores ``/run/cloud-init/instance-data.json`` as a world-readable json file. +Because user-data and vendor-data can contain passwords both of these files +are readonly for *root* as well. The *root* user can also read +``/run/cloud-init/instance-data-sensitive.json`` which is all instance data +from instance-data.json as well as unredacted sensitive content. + + +Format of instance-data.json +============================ + +The instance-data.json and instance-data-sensitive.json files are well-formed +JSON and record the set of keys and values for any metadata processed by +cloud-init. Cloud-init standardizes the format for this content so that it +can be generalized across different cloud platforms. + +There are three basic top-level keys: + +* **base64_encoded_keys**: A list of forward-slash delimited key paths into + the instance-data.json object whose value is base64encoded for json + compatibility. Values at these paths should be decoded to get the original + value. + +* **sensitive_keys**: A list of forward-slash delimited key paths into + the instance-data.json object whose value is considered by the datasource as + 'security sensitive'. Only the keys listed here will be redacted from + instance-data.json for non-root users. + +* **ds**: Datasource-specific metadata crawled for the specific cloud + platform. It should closely represent the structure of the cloud metadata + crawled. The structure of content and details provided are entirely + cloud-dependent. Mileage will vary depending on what the cloud exposes. + The content exposed under the 'ds' key is currently **experimental** and + expected to change slightly in the upcoming cloud-init release. + +* **v1**: Standardized cloud-init metadata keys, these keys are guaranteed to + exist on all cloud platforms. They will also retain their current behavior + and format and will be carried forward even if cloud-init introduces a new + version of standardized keys with **v2**. + +The standardized keys present: + ++----------------------+-----------------------------------------------+---------------------------+ +| Key path | Description | Examples | ++======================+===============================================+===========================+ +| v1.cloud_name | The name of the cloud provided by metadata | aws, openstack, azure, | +| | key 'cloud-name' or the cloud-init datasource | configdrive, nocloud, | +| | name which was discovered. | ovf, etc. | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.instance_id | Unique instance_id allocated by the cloud | i- | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.local_hostname | The internal or local hostname of the system | ip-10-41-41-70, | +| | | | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.region | The physical region/datacenter in which the | us-east-2 | +| | instance is deployed | | ++----------------------+-----------------------------------------------+---------------------------+ +| v1.availability_zone | The physical availability zone in which the | us-east-2b, nova, null | +| | instance is deployed | | ++----------------------+-----------------------------------------------+---------------------------+ + + +Below is an example of ``/run/cloud-init/instance_data.json`` on an EC2 +instance: + +.. sourcecode:: json + + { + "base64_encoded_keys": [], + "sensitive_keys": [], + "ds": { + "meta_data": { + "ami-id": "ami-014e1416b628b0cbf", + "ami-launch-index": "0", + "ami-manifest-path": "(unknown)", + "block-device-mapping": { + "ami": "/dev/sda1", + "ephemeral0": "sdb", + "ephemeral1": "sdc", + "root": "/dev/sda1" + }, + "hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "instance-action": "none", + "instance-id": "i-04fa31cfc55aa7976", + "instance-type": "t2.micro", + "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "local-ipv4": "10.41.41.70", + "mac": "06:b6:92:dd:9d:24", + "metrics": { + "vhostmd": "" + }, + "network": { + "interfaces": { + "macs": { + "06:b6:92:dd:9d:24": { + "device-number": "0", + "interface-id": "eni-08c0c9fdb99b6e6f4", + "ipv4-associations": { + "18.224.22.43": "10.41.41.70" + }, + "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal", + "local-ipv4s": "10.41.41.70", + "mac": "06:b6:92:dd:9d:24", + "owner-id": "437526006925", + "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", + "public-ipv4s": "18.224.22.43", + "security-group-ids": "sg-828247e9", + "security-groups": "Cloud-init integration test secgroup", + "subnet-id": "subnet-282f3053", + "subnet-ipv4-cidr-block": "10.41.41.0/24", + "subnet-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/64", + "vpc-id": "vpc-252ef24d", + "vpc-ipv4-cidr-block": "10.41.0.0/16", + "vpc-ipv4-cidr-blocks": "10.41.0.0/16", + "vpc-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/56" + } + } + } + }, + "placement": { + "availability-zone": "us-east-2b" + }, + "profile": "default-hvm", + "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com", + "public-ipv4": "18.224.22.43", + "public-keys": { + "cloud-init-integration": [ + "ssh-rsa + AAAAB3NzaC1yc2EAAAADAQABAAABAQDSL7uWGj8cgWyIOaspgKdVy0cKJ+UTjfv7jBOjG2H/GN8bJVXy72XAvnhM0dUM+CCs8FOf0YlPX+Frvz2hKInrmRhZVwRSL129PasD12MlI3l44u6IwS1o/W86Q+tkQYEljtqDOo0a+cOsaZkvUNzUyEXUwz/lmYa6G4hMKZH4NBj7nbAAF96wsMCoyNwbWryBnDYUr6wMbjRR1J9Pw7Xh7WRC73wy4Va2YuOgbD3V/5ZrFPLbWZW/7TFXVrql04QVbyei4aiFR5n//GvoqwQDNe58LmbzX/xvxyKJYdny2zXmdAhMxbrpFQsfpkJ9E/H5w0yOdSvnWbUoG5xNGoOB + cloud-init-integration" + ] + }, + "reservation-id": "r-06ab75e9346f54333", + "security-groups": "Cloud-init integration test secgroup", + "services": { + "domain": "amazonaws.com", + "partition": "aws" + } + } + }, + "v1": { + "availability-zone": "us-east-2b", + "availability_zone": "us-east-2b", + "cloud-name": "aws", + "cloud_name": "aws", + "instance-id": "i-04fa31cfc55aa7976", + "instance_id": "i-04fa31cfc55aa7976", + "local-hostname": "ip-10-41-41-70", + "local_hostname": "ip-10-41-41-70", + "region": "us-east-2" + } + } + + +Using instance-data +=================== + +As of cloud-init v. 18.4, any variables present in +``/run/cloud-init/instance-data.json`` can be used in: + +* User-data scripts +* Cloud config data +* Command line interface via **cloud-init query** or + **cloud-init devel render** + +Many clouds allow users to provide user-data to an instance at +the time the instance is launched. Cloud-init supports a number of +:ref:`user_data_formats`. + +Both user-data scripts and **#cloud-config** data support jinja template +rendering. +When the first line of the provided user-data begins with, +**## template: jinja** cloud-init will use jinja to render that file. +Any instance-data-sensitive.json variables are surfaced as dot-delimited +jinja template variables because cloud-config modules are run as 'root' +user. + + +Below are some examples of providing these types of user-data: + +* Cloud config calling home with the ec2 public hostname and avaliability-zone + +.. code-block:: shell-session + + ## template: jinja + #cloud-config + runcmd: + - echo 'EC2 public hostname allocated to instance: {{ + ds.meta_data.public_hostname }}' > /tmp/instance_metadata + - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> + /tmp/instance_metadata + - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", + "availability-zone": "{{ v1.availability_zone }}"}' + https://example.com + +* Custom user-data script performing different operations based on region + +.. code-block:: shell-session + + ## template: jinja + #!/bin/bash + {% if v1.region == 'us-east-2' -%} + echo 'Installing custom proxies for {{ v1.region }} + sudo apt-get install my-xtra-fast-stack + {%- endif %} + ... + +.. note:: + Trying to reference jinja variables that don't exist in + instance-data.json will result in warnings in ``/var/log/cloud-init.log`` + and the following string in your rendered user-data: + ``CI_MISSING_JINJA_VAR/``. + +Cloud-init also surfaces a commandline tool **cloud-init query** which can +assist developers or scripts with obtaining instance metadata easily. See +:ref:`cli_query` for more information. + +To cut down on keystrokes on the command line, cloud-init also provides +top-level key aliases for any standardized ``v#`` keys present. The preceding +``v1`` is not required of ``v1.var_name`` These aliases will represent the +value of the highest versioned standard key. For example, ``cloud_name`` +value will be ``v2.cloud_name`` if both ``v1`` and ``v2`` keys are present in +instance-data.json. +The **query** command also publishes ``userdata`` and ``vendordata`` keys to +the root user which will contain the decoded user and vendor data provided to +this instance. Non-root users referencing userdata or vendordata keys will +see only redacted values. + +.. code-block:: shell-session + + # List all top-level instance-data keys available + % cloud-init query --list-keys + + # Find your EC2 ami-id + % cloud-init query ds.metadata.ami_id + + # Format your cloud_name and region using jinja template syntax + % cloud-init query --format 'cloud: {{ v1.cloud_name }} myregion: {{ + % v1.region }}' + +.. note:: + To save time designing a user-data template for a specific cloud's + instance-data.json, use the 'render' cloud-init command on an + instance booted on your favorite cloud. See :ref:`cli_devel` for more + information. + +.. vi: textwidth=78 diff --git a/integration-requirements.txt b/integration-requirements.txt index f80cb942..880d9886 100644 --- a/integration-requirements.txt +++ b/integration-requirements.txt @@ -5,16 +5,17 @@ # the packages/pkg-deps.json file as well. # +unittest2 # ec2 backend boto3==1.5.9 # ssh communication paramiko==2.4.1 + # lxd backend # 04/03/2018: enables use of lxd 3.0 git+https://github.com/lxc/pylxd.git@4b8ab1802f9aee4eb29cf7b119dae0aa47150779 - # finds latest image information git+https://git.launchpad.net/simplestreams diff --git a/tests/cloud_tests/testcases/base.py b/tests/cloud_tests/testcases/base.py index 27458271..c5457968 100644 --- a/tests/cloud_tests/testcases/base.py +++ b/tests/cloud_tests/testcases/base.py @@ -5,15 +5,15 @@ import crypt import json import re -import unittest +import unittest2 from cloudinit import util as c_util -SkipTest = unittest.SkipTest +SkipTest = unittest2.SkipTest -class CloudTestCase(unittest.TestCase): +class CloudTestCase(unittest2.TestCase): """Base test class for verifiers.""" # data gets populated in get_suite.setUpClass @@ -167,8 +167,9 @@ class CloudTestCase(unittest.TestCase): 'Skipping instance-data.json test.' ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) - self.assertEqual( - ['ds/user_data'], instance_data['base64_encoded_keys']) + self.assertItemsEqual( + [], + instance_data['base64_encoded_keys']) ds = instance_data.get('ds', {}) v1_data = instance_data.get('v1', {}) metadata = ds.get('meta-data', {}) @@ -187,10 +188,10 @@ class CloudTestCase(unittest.TestCase): metadata.get('placement', {}).get('availability-zone'), 'Could not determine EC2 Availability zone placement') self.assertIsNotNone( - v1_data['availability-zone'], 'expected ec2 availability-zone') - self.assertEqual('aws', v1_data['cloud-name']) - self.assertIn('i-', v1_data['instance-id']) - self.assertIn('ip-', v1_data['local-hostname']) + v1_data['availability_zone'], 'expected ec2 availability_zone') + self.assertEqual('aws', v1_data['cloud_name']) + self.assertIn('i-', v1_data['instance_id']) + self.assertIn('ip-', v1_data['local_hostname']) self.assertIsNotNone(v1_data['region'], 'expected ec2 region') def test_instance_data_json_lxd(self): @@ -213,16 +214,14 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) - self.assertEqual( - ['ds/user_data', 'ds/vendor_data'], - sorted(instance_data['base64_encoded_keys'])) - self.assertEqual('nocloud', v1_data['cloud-name']) + self.assertItemsEqual([], sorted(instance_data['base64_encoded_keys'])) + self.assertEqual('nocloud', v1_data['cloud_name']) self.assertIsNone( - v1_data['availability-zone'], - 'found unexpected lxd availability-zone %s' % - v1_data['availability-zone']) - self.assertIn('cloud-test', v1_data['instance-id']) - self.assertIn('cloud-test', v1_data['local-hostname']) + v1_data['availability_zone'], + 'found unexpected lxd availability_zone %s' % + v1_data['availability_zone']) + self.assertIn('cloud-test', v1_data['instance_id']) + self.assertIn('cloud-test', v1_data['local_hostname']) self.assertIsNone( v1_data['region'], 'found unexpected lxd region %s' % v1_data['region']) @@ -248,18 +247,17 @@ class CloudTestCase(unittest.TestCase): ' OS: %s not bionic or newer' % self.os_name) instance_data = json.loads(out) v1_data = instance_data.get('v1', {}) - self.assertEqual( - ['ds/user_data'], instance_data['base64_encoded_keys']) - self.assertEqual('nocloud', v1_data['cloud-name']) + self.assertItemsEqual([], instance_data['base64_encoded_keys']) + self.assertEqual('nocloud', v1_data['cloud_name']) self.assertIsNone( - v1_data['availability-zone'], - 'found unexpected kvm availability-zone %s' % - v1_data['availability-zone']) + v1_data['availability_zone'], + 'found unexpected kvm availability_zone %s' % + v1_data['availability_zone']) self.assertIsNotNone( re.match(r'[\da-f]{8}(-[\da-f]{4}){3}-[\da-f]{12}', - v1_data['instance-id']), - 'kvm instance-id is not a UUID: %s' % v1_data['instance-id']) - self.assertIn('ubuntu', v1_data['local-hostname']) + v1_data['instance_id']), + 'kvm instance_id is not a UUID: %s' % v1_data['instance_id']) + self.assertIn('ubuntu', v1_data['local_hostname']) self.assertIsNone( v1_data['region'], 'found unexpected lxd region %s' % v1_data['region']) -- cgit v1.2.3