From 0fe96a44cde48cc688afe75beb8fd126c8892b8c Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Thu, 2 Dec 2021 21:25:43 -0700 Subject: jinja: provide and document jinja-safe key aliases in instance-data (SC-622) (#1123) Allow #cloud-config and cloud-init query to use underscore-delimited "jinja-safe" key aliases for any instance-data.json keys containing jinja operator characters. This provides a means to use Jinja's dot-notation instead of square brackets and quoting to reference "unsafe" obtain attribute names. Support for these aliased keys is available to both #cloud-config user-data and `cloud-init query`. For example #cloud-config alias access can look like: {{ ds.config.user_network_config }} - instead of - {{ ds.config["user.network-config"] }} --- cloudinit/cmd/query.py | 134 +++++++++++++++++++++++++++-------- cloudinit/cmd/tests/test_query.py | 71 ++++++++++++++++--- cloudinit/handlers/jinja_template.py | 48 ++++++++++--- cloudinit/sources/DataSourceLXD.py | 26 ++++--- cloudinit/sources/tests/test_lxd.py | 71 +++++++++---------- 5 files changed, 250 insertions(+), 100 deletions(-) (limited to 'cloudinit') diff --git a/cloudinit/cmd/query.py b/cloudinit/cmd/query.py index 07db9552..e53cd855 100644 --- a/cloudinit/cmd/query.py +++ b/cloudinit/cmd/query.py @@ -19,7 +19,10 @@ import os import sys from cloudinit.handlers.jinja_template import ( - convert_jinja_instance_data, render_jinja_payload) + convert_jinja_instance_data, + get_jinja_variable_alias, + render_jinja_payload +) from cloudinit.cmd.devel import addLogHandlerCLI, read_cfg_paths from cloudinit import log from cloudinit.sources import ( @@ -93,22 +96,24 @@ def load_userdata(ud_file_path): return util.decomp_gzip(bdata, quiet=False, decode=True) -def handle_args(name, args): - """Handle calls to 'cloud-init query' as a subcommand.""" - paths = None - addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING) - if not any([args.list_keys, args.varname, args.format, args.dump_all]): - LOG.error( - 'Expected one of the options: --all, --format,' - ' --list-keys or varname') - get_parser().print_help() - return 1 +def _read_instance_data(instance_data, user_data, vendor_data) -> dict: + """Return a dict of merged instance-data, vendordata and userdata. + The dict will contain supplemental userdata and vendordata keys sourced + from default user-data and vendor-data files. + + Non-root users will have redacted INSTANCE_JSON_FILE content and redacted + vendordata and userdata values. + + :raise: IOError/OSError on absence of instance-data.json file or invalid + access perms. + """ + paths = None uid = os.getuid() - if not all([args.instance_data, args.user_data, args.vendor_data]): + if not all([instance_data, user_data, vendor_data]): paths = read_cfg_paths() - if args.instance_data: - instance_data_fn = args.instance_data + if instance_data: + instance_data_fn = instance_data else: redacted_data_fn = os.path.join(paths.run_dir, INSTANCE_JSON_FILE) if uid == 0: @@ -124,12 +129,12 @@ def handle_args(name, args): instance_data_fn = redacted_data_fn else: instance_data_fn = redacted_data_fn - if args.user_data: - user_data_fn = args.user_data + if user_data: + user_data_fn = user_data else: user_data_fn = os.path.join(paths.instance_link, 'user-data.txt') - if args.vendor_data: - vendor_data_fn = args.vendor_data + if vendor_data: + vendor_data_fn = vendor_data else: vendor_data_fn = os.path.join(paths.instance_link, 'vendor-data.txt') @@ -140,7 +145,7 @@ def handle_args(name, args): LOG.error("No read permission on '%s'. Try sudo", instance_data_fn) else: LOG.error('Missing instance-data file: %s', instance_data_fn) - return 1 + raise instance_data = util.load_json(instance_json) if uid != 0: @@ -151,6 +156,65 @@ def handle_args(name, args): else: instance_data['userdata'] = load_userdata(user_data_fn) instance_data['vendordata'] = load_userdata(vendor_data_fn) + return instance_data + + +def _find_instance_data_leaf_by_varname_path( + jinja_vars_without_aliases: dict, jinja_vars_with_aliases: dict, + varname: str, list_keys: bool +): + """Return the value of the dot-delimited varname path in instance-data + + Split a dot-delimited jinja variable name path into components, walk the + path components into the instance_data and look up a matching jinja + variable name or cloud-init's underscore-delimited key aliases. + + :raises: ValueError when varname represents an invalid key name or path or + if list-keys is provided by varname isn't a dict object. + """ + walked_key_path = "" + response = jinja_vars_without_aliases + for key_path_part in varname.split('.'): + try: + # Walk key path using complete aliases dict, yet response + # should only contain jinja_without_aliases + jinja_vars_with_aliases = jinja_vars_with_aliases[key_path_part] + except KeyError as e: + if walked_key_path: + msg = "instance-data '{key_path}' has no '{leaf}'".format( + leaf=key_path_part, key_path=walked_key_path + ) + else: + msg = "Undefined instance-data key '{}'".format(varname) + raise ValueError(msg) from e + if key_path_part in response: + response = response[key_path_part] + else: # We are an underscore_delimited key alias + for key in response: + if get_jinja_variable_alias(key) == key_path_part: + response = response[key] + break + if walked_key_path: + walked_key_path += "." + walked_key_path += key_path_part + return response + + +def handle_args(name, args): + """Handle calls to 'cloud-init query' as a subcommand.""" + addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING) + if not any([args.list_keys, args.varname, args.format, args.dump_all]): + LOG.error( + 'Expected one of the options: --all, --format,' + ' --list-keys or varname') + get_parser().print_help() + return 1 + try: + instance_data = _read_instance_data( + args.instance_data, args.user_data, args.vendor_data + ) + except (IOError, OSError): + return 1 if args.format: payload = '## template: jinja\n{fmt}'.format(fmt=args.format) rendered_payload = render_jinja_payload( @@ -162,20 +226,32 @@ def handle_args(name, args): return 0 return 1 + # If not rendering a structured format above, query output will be either: + # - JSON dump of all instance-data/jinja variables + # - JSON dump of a value at an dict path into the instance-data dict. + # - a list of keys for a specific dict path into the instance-data dict. response = convert_jinja_instance_data(instance_data) if args.varname: + jinja_vars_with_aliases = convert_jinja_instance_data( + instance_data, include_key_aliases=True + ) try: - for var in args.varname.split('.'): - response = response[var] - except KeyError: - LOG.error('Undefined instance-data key %s', args.varname) + response = _find_instance_data_leaf_by_varname_path( + jinja_vars_without_aliases=response, + jinja_vars_with_aliases=jinja_vars_with_aliases, + varname=args.varname, + list_keys=args.list_keys + ) + except (KeyError, ValueError) as e: + LOG.error(e) + return 1 + if args.list_keys: + if not isinstance(response, dict): + LOG.error( + "--list-keys provided but '%s' is not a dict", + args.varname + ) return 1 - if args.list_keys: - if not isinstance(response, dict): - LOG.error("--list-keys provided but '%s' is not a dict", var) - return 1 - response = '\n'.join(sorted(response.keys())) - elif args.list_keys: response = '\n'.join(sorted(response.keys())) if not isinstance(response, str): response = util.json_dumps(response) diff --git a/cloudinit/cmd/tests/test_query.py b/cloudinit/cmd/tests/test_query.py index c258d321..d96c3945 100644 --- a/cloudinit/cmd/tests/test_query.py +++ b/cloudinit/cmd/tests/test_query.py @@ -75,6 +75,40 @@ class TestQuery: assert 'usage: query' in out assert 1 == m_cli_log.call_count + @pytest.mark.parametrize( + "inst_data,varname,expected_error", ( + ( + '{"v1": {"key-2": "value-2"}}', + 'v1.absent_leaf', + "instance-data 'v1' has no 'absent_leaf'\n" + ), + ( + '{"v1": {"key-2": "value-2"}}', + 'absent_key', + "Undefined instance-data key 'absent_key'\n" + ), + ) + ) + def test_handle_args_error_on_invalid_vaname_paths( + self, inst_data, varname, expected_error, caplog, tmpdir + ): + """Error when varname is not a valid instance-data variable path.""" + instance_data = tmpdir.join('instance-data') + instance_data.write(inst_data) + args = self.args( + debug=False, dump_all=False, format=None, + instance_data=instance_data.strpath, + list_keys=False, user_data=None, vendor_data=None, varname=varname + ) + paths, _, _, _ = self._setup_paths(tmpdir) + with mock.patch('cloudinit.cmd.query.read_cfg_paths') as m_paths: + m_paths.return_value = paths + with mock.patch( + "cloudinit.cmd.query.addLogHandlerCLI", return_value="" + ): + assert 1 == query.handle_args('anyname', args) + assert expected_error in caplog.text + def test_handle_args_error_on_missing_instance_data(self, caplog, tmpdir): """When instance_data file path does not exist, log an error.""" absent_fn = tmpdir.join('absent') @@ -166,7 +200,7 @@ class TestQuery: assert 0 == query.handle_args('anyname', args) out, _err = capsys.readouterr() cmd_output = json.loads(out) - assert "it worked" == cmd_output['my_var'] + assert "it worked" == cmd_output['my-var'] if ud_expected == "ci-b64:": ud_expected = "ci-b64:{}".format(b64e(ud_src)) if vd_expected == "ci-b64:": @@ -193,8 +227,8 @@ class TestQuery: m_getuid.return_value = 0 assert 0 == query.handle_args('anyname', args) expected = ( - '{\n "my_var": "it worked",\n "userdata": "ud",\n ' - '"vendordata": "vd"\n}\n' + '{\n "my-var": "it worked",\n ' + '"userdata": "ud",\n "vendordata": "vd"\n}\n' ) out, _err = capsys.readouterr() assert expected == out @@ -211,7 +245,7 @@ class TestQuery: m_getuid.return_value = 100 assert 0 == query.handle_args('anyname', args) expected = ( - '{\n "my_var": "it worked",\n "userdata": "<%s> file:ud",\n' + '{\n "my-var": "it worked",\n "userdata": "<%s> file:ud",\n' ' "vendordata": "<%s> file:vd"\n}\n' % ( REDACT_SENSITIVE_VALUE, REDACT_SENSITIVE_VALUE ) @@ -233,21 +267,38 @@ class TestQuery: out, _err = capsys.readouterr() assert 'it worked\n' == out - def test_handle_args_returns_nested_varname(self, capsys, tmpdir): + @pytest.mark.parametrize( + 'inst_data,varname,expected', + ( + ( + '{"v1": {"key-2": "value-2"}, "my-var": "it worked"}', + 'v1.key_2', + 'value-2\n' + ), + # Assert no jinja underscore-delimited aliases are reported on CLI + ( + '{"v1": {"something-hyphenated": {"no.underscores":"x",' + ' "no-alias": "y"}}, "my-var": "it worked"}', + 'v1.something_hyphenated', + '{\n "no-alias": "y",\n "no.underscores": "x"\n}\n' + ), + ) + ) + def test_handle_args_returns_nested_varname( + self, inst_data, varname, expected, capsys, tmpdir + ): """If user_data file is a jinja template render instance-data vars.""" instance_data = tmpdir.join('instance-data') - instance_data.write( - '{"v1": {"key-2": "value-2"}, "my-var": "it worked"}' - ) + instance_data.write(inst_data) args = self.args( debug=False, dump_all=False, format=None, instance_data=instance_data.strpath, user_data='ud', - vendor_data='vd', list_keys=False, varname='v1.key_2') + vendor_data='vd', list_keys=False, varname=varname) with mock.patch('os.getuid') as m_getuid: m_getuid.return_value = 100 assert 0 == query.handle_args('anyname', args) out, _err = capsys.readouterr() - assert 'value-2\n' == out + assert expected == out def test_handle_args_returns_standardized_vars_to_top_level_aliases( self, capsys, tmpdir diff --git a/cloudinit/handlers/jinja_template.py b/cloudinit/handlers/jinja_template.py index 5033abbb..de88a5ea 100644 --- a/cloudinit/handlers/jinja_template.py +++ b/cloudinit/handlers/jinja_template.py @@ -1,14 +1,18 @@ # This file is part of cloud-init. See LICENSE file for license information. +import copy from errno import EACCES import os import re +from typing import Optional try: from jinja2.exceptions import UndefinedError as JUndefinedError + from jinja2.lexer import operator_re except ImportError: # No jinja2 dependency JUndefinedError = Exception + operator_re = re.compile(r'[-.]') from cloudinit import handlers from cloudinit import log as logging @@ -97,7 +101,9 @@ def render_jinja_payload_from_file( def render_jinja_payload(payload, payload_fn, instance_data, debug=False): instance_jinja_vars = convert_jinja_instance_data( instance_data, - decode_paths=instance_data.get('base64-encoded-keys', [])) + decode_paths=instance_data.get('base64-encoded-keys', []), + include_key_aliases=True + ) if debug: LOG.debug('Converted jinja variables\n%s', json_dumps(instance_jinja_vars)) @@ -118,7 +124,30 @@ def render_jinja_payload(payload, payload_fn, instance_data, debug=False): return rendered_payload -def convert_jinja_instance_data(data, prefix='', sep='/', decode_paths=()): +def get_jinja_variable_alias(orig_name: str) -> Optional[str]: + """Return a jinja variable alias, replacing any operators with underscores. + + Provide underscore-delimited key aliases to simplify dot-notation + attribute references for keys which contain operators "." or "-". + This provides for simpler short-hand jinja attribute notation + allowing one to avoid quoting keys which contain operators. + {{ ds.v1_0.config.user_network_config }} instead of + {{ ds['v1.0'].config["user.network-config"] }}. + + :param orig_name: String representing a jinja variable name to scrub/alias. + + :return: A string with any jinja operators replaced if needed. Otherwise, + none if no alias required. + """ + alias_name = re.sub(operator_re, '_', orig_name) + if alias_name != orig_name: + return alias_name + return None + + +def convert_jinja_instance_data( + data, prefix='', sep='/', decode_paths=(), include_key_aliases=False +): """Process instance-data.json dict for use in jinja templates. Replace hyphens with underscores for jinja templates and decode any @@ -127,21 +156,24 @@ def convert_jinja_instance_data(data, prefix='', sep='/', decode_paths=()): result = {} decode_paths = [path.replace('-', '_') for path in decode_paths] for key, value in sorted(data.items()): - if '-' in key: - # Standardize keys for use in #cloud-config/shell templates - key = key.replace('-', '_') key_path = '{0}{1}{2}'.format(prefix, sep, key) if prefix else key if key_path in decode_paths: value = b64d(value) if isinstance(value, dict): result[key] = convert_jinja_instance_data( - value, key_path, sep=sep, decode_paths=decode_paths) - if re.match(r'v\d+', key): + value, key_path, sep=sep, decode_paths=decode_paths, + include_key_aliases=include_key_aliases + ) + if re.match(r'v\d+$', key): # Copy values to top-level aliases for subkey, subvalue in result[key].items(): - result[subkey] = subvalue + result[subkey] = copy.deepcopy(subvalue) else: result[key] = value + if include_key_aliases: + alias_name = get_jinja_variable_alias(key) + if alias_name: + result[alias_name] = copy.deepcopy(result[key]) return result # vi: ts=4 expandtab diff --git a/cloudinit/sources/DataSourceLXD.py b/cloudinit/sources/DataSourceLXD.py index 55ae52a2..469707d2 100644 --- a/cloudinit/sources/DataSourceLXD.py +++ b/cloudinit/sources/DataSourceLXD.py @@ -190,19 +190,16 @@ class DataSourceLXD(sources.DataSource): self.metadata = _raw_instance_data_to_dict( "meta-data", self._crawled_metadata.get("meta-data") ) - if LXD_SOCKET_API_VERSION in self._crawled_metadata: - config = self._crawled_metadata[LXD_SOCKET_API_VERSION].get( - "config", {} + config = self._crawled_metadata.get("config", {}) + user_metadata = config.get("user.meta-data", {}) + if user_metadata: + user_metadata = _raw_instance_data_to_dict( + "user.meta-data", user_metadata + ) + if not isinstance(self.metadata, dict): + self.metadata = util.mergemanydict( + [util.load_yaml(self.metadata), user_metadata] ) - user_metadata = config.get("user.meta-data", {}) - if user_metadata: - user_metadata = _raw_instance_data_to_dict( - "user.meta-data", user_metadata - ) - if not isinstance(self.metadata, dict): - self.metadata = util.mergemanydict( - [util.load_yaml(self.metadata), user_metadata] - ) if "user-data" in self._crawled_metadata: self.userdata_raw = self._crawled_metadata["user-data"] if "network-config" in self._crawled_metadata: @@ -304,7 +301,8 @@ def read_metadata( if metadata_only: return md # Skip network-data, vendor-data, user-data - md[LXD_SOCKET_API_VERSION] = { + md = { + "_metadata_api_version": api_version, # Document API version read "config": {}, "meta-data": md["meta-data"] } @@ -345,7 +343,7 @@ def read_metadata( # Leave raw data values/format unchanged to represent it in # instance-data.json for cloud-init query or jinja template # use. - md[LXD_SOCKET_API_VERSION]["config"][cfg_key] = response.text + md["config"][cfg_key] = response.text # Promote common CONFIG_KEY_ALIASES to top-level keys. if cfg_key in CONFIG_KEY_ALIASES: # Due to sort of config_routes, promote cloud-init.* diff --git a/cloudinit/sources/tests/test_lxd.py b/cloudinit/sources/tests/test_lxd.py index fc2a41df..a6e51f3b 100644 --- a/cloudinit/sources/tests/test_lxd.py +++ b/cloudinit/sources/tests/test_lxd.py @@ -42,15 +42,12 @@ LXD_V1_METADATA = { "network-config": NETWORK_V1, "user-data": "#cloud-config\npackages: [sl]\n", "vendor-data": "#cloud-config\nruncmd: ['echo vendor-data']\n", - "1.0": { - "meta-data": "instance-id: my-lxc\nlocal-hostname: my-lxc\n\n", - "config": { - "user.user-data": - "instance-id: my-lxc\nlocal-hostname: my-lxc\n\n", - "user.vendor-data": - "#cloud-config\nruncmd: ['echo vendor-data']\n", - "user.network-config": yaml.safe_dump(NETWORK_V1), - } + "config": { + "user.user-data": + "instance-id: my-lxc\nlocal-hostname: my-lxc\n\n", + "user.vendor-data": + "#cloud-config\nruncmd: ['echo vendor-data']\n", + "user.network-config": yaml.safe_dump(NETWORK_V1), } } @@ -190,8 +187,10 @@ class TestReadMetadata: "http://lxd/1.0/meta-data": "local-hostname: md\n", "http://lxd/1.0/config": "[]", }, - {"1.0": {"config": {}, "meta-data": "local-hostname: md\n"}, - "meta-data": "local-hostname: md\n"}, + { + "_metadata_api_version": lxd.LXD_SOCKET_API_VERSION, + "config": {}, "meta-data": "local-hostname: md\n" + }, ["[GET] [HTTP:200] http://lxd/1.0/meta-data", "[GET] [HTTP:200] http://lxd/1.0/config"], ), @@ -211,12 +210,10 @@ class TestReadMetadata: "http://lxd/1.0/config/user.vendor-data": "", # 404 }, { - "1.0": { - "config": { - "user.custom1": "custom1", # Not promoted - "user.network-config": "net-config", - }, - "meta-data": "local-hostname: md\n", + "_metadata_api_version": lxd.LXD_SOCKET_API_VERSION, + "config": { + "user.custom1": "custom1", # Not promoted + "user.network-config": "net-config", }, "meta-data": "local-hostname: md\n", "network-config": "net-config", @@ -250,15 +247,13 @@ class TestReadMetadata: "http://lxd/1.0/config/user.vendor-data": "vendor-data", }, { - "1.0": { - "config": { - "user.custom1": "custom1", # Not promoted - "user.meta-data": "meta-data", - "user.network-config": "net-config", - "user.user-data": "user-data", - "user.vendor-data": "vendor-data", - }, - "meta-data": "local-hostname: md\n", + "_metadata_api_version": lxd.LXD_SOCKET_API_VERSION, + "config": { + "user.custom1": "custom1", # Not promoted + "user.meta-data": "meta-data", + "user.network-config": "net-config", + "user.user-data": "user-data", + "user.vendor-data": "vendor-data", }, "meta-data": "local-hostname: md\n", "network-config": "net-config", @@ -303,19 +298,17 @@ class TestReadMetadata: "cloud-init.vendor-data", }, { - "1.0": { - "config": { - "user.meta-data": "user.meta-data", - "user.network-config": "user.network-config", - "user.user-data": "user.user-data", - "user.vendor-data": "user.vendor-data", - "cloud-init.network-config": - "cloud-init.network-config", - "cloud-init.user-data": "cloud-init.user-data", - "cloud-init.vendor-data": - "cloud-init.vendor-data", - }, - "meta-data": "local-hostname: md\n", + "_metadata_api_version": lxd.LXD_SOCKET_API_VERSION, + "config": { + "user.meta-data": "user.meta-data", + "user.network-config": "user.network-config", + "user.user-data": "user.user-data", + "user.vendor-data": "user.vendor-data", + "cloud-init.network-config": + "cloud-init.network-config", + "cloud-init.user-data": "cloud-init.user-data", + "cloud-init.vendor-data": + "cloud-init.vendor-data", }, "meta-data": "local-hostname: md\n", "network-config": "cloud-init.network-config", -- cgit v1.2.3