diff options
author | Chad Smith <chad.smith@canonical.com> | 2022-01-18 10:05:29 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-18 10:05:29 -0700 |
commit | 4ba6fd283674df1ef25300d91c6d2061910744be (patch) | |
tree | c70e12ed177e8383a1e2e5fd1a1fdb041ac1d0b6 /cloudinit | |
parent | 45484c0b05d39461500212481e2466155dd1e210 (diff) | |
download | vyos-cloud-init-4ba6fd283674df1ef25300d91c6d2061910744be.tar.gz vyos-cloud-init-4ba6fd283674df1ef25300d91c6d2061910744be.zip |
Single JSON schema validation in early boot (#1175)
Package a single JSON schema file for user-data validation at
cloudinit/config/cloud-init-schema.json.
Perform validate_cloudconfig_schema call to just after the
user-data is consumed. This will allow single validation of all
user-data against the full schema instead of
repetitive validatation calls against each cloud-config module
(cloudinit.config.cc_*) sub-schemas.
This branch defines the simple apt_pipelining schema and
migrates existing cc_apk_configure into cloud-init-schema.json.
The expectation will be additional branches to migrate from legacy
"schema" attributes inside each cloud-config module toward unique
cc_<module_name> definitions in the global shema file under "$defs"
of cloud-init-schema-X.Y..json.
Before legacy sub-schema definitions are migrated the following
funcs grew support to read sub-schemas from both static
cloud-init-schema.json and the individual cloud-config module
"schema" attributes:
- get_schema: source base schema file from cloud-init-schema.json
and supplement with all legacy cloud-config module "schema" defs
- get_meta_doc: optional schema param so cloud-config modules
no longer provide the own local sub-schemas
- _get_property_doc: render only documentation of sub-schema based
on meta['id'] provided
- validate_cloudconfig_schema: allow optional schema param
Additionally, fix two minor bugs in _schemapath_for_cloudconfig:
- `cloud-init devel schema --annotate` which results in a Traceback
if two keys at the same indent level have invalid types.
- exit early on empty cloud-config to avoid a Traceback on the CLI
Diffstat (limited to 'cloudinit')
-rw-r--r-- | cloudinit/cmd/main.py | 7 | ||||
-rw-r--r-- | cloudinit/config/cc_apk_configure.py | 103 | ||||
-rw-r--r-- | cloudinit/config/cc_apt_pipelining.py | 63 | ||||
-rw-r--r-- | cloudinit/config/cloud-init-schema.json | 69 | ||||
-rw-r--r-- | cloudinit/config/schema.py | 99 |
5 files changed, 187 insertions, 154 deletions
diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index e67edbc3..c9be41b3 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -22,6 +22,7 @@ from cloudinit import patcher patcher.patch_logging() +from cloudinit.config.schema import validate_cloudconfig_schema from cloudinit import log as logging from cloudinit import netinfo from cloudinit import signal_handler @@ -474,6 +475,12 @@ def main_init(name, args): util.logexc(LOG, "Consuming user data failed!") return (init.datasource, ["Consuming user data failed!"]) + # Validate user-data adheres to schema definition + if os.path.exists(init.paths.get_ipath_cur("userdata_raw")): + validate_cloudconfig_schema(config=init.cfg, strict=False) + else: + LOG.debug("Skipping user-data validation. No user-data found.") + apply_reporting_cfg(init.cfg) # Stage 8 - re-read and apply relevant cloud-config to include user-data diff --git a/cloudinit/config/cc_apk_configure.py b/cloudinit/config/cc_apk_configure.py index a615c814..2cb2dad1 100644 --- a/cloudinit/config/cc_apk_configure.py +++ b/cloudinit/config/cc_apk_configure.py @@ -10,7 +10,7 @@ from textwrap import dedent from cloudinit import log as logging from cloudinit import temp_utils, templater, util -from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema +from cloudinit.config.schema import get_meta_doc from cloudinit.settings import PER_INSTANCE LOG = logging.getLogger(__name__) @@ -102,104 +102,7 @@ meta = { "frequency": frequency, } -schema = { - "type": "object", - "properties": { - "apk_repos": { - "type": "object", - "properties": { - "preserve_repositories": { - "type": "boolean", - "default": False, - "description": dedent( - """\ - By default, cloud-init will generate a new repositories - file ``/etc/apk/repositories`` based on any valid - configuration settings specified within a apk_repos - section of cloud config. To disable this behavior and - preserve the repositories file from the pristine image, - set ``preserve_repositories`` to ``true``. - - The ``preserve_repositories`` option overrides - all other config keys that would alter - ``/etc/apk/repositories``. - """ - ), - }, - "alpine_repo": { - "type": ["object", "null"], - "properties": { - "base_url": { - "type": "string", - "default": DEFAULT_MIRROR, - "description": dedent( - """\ - The base URL of an Alpine repository, or - mirror, to download official packages from. - If not specified then it defaults to ``{}`` - """.format( - DEFAULT_MIRROR - ) - ), - }, - "community_enabled": { - "type": "boolean", - "default": False, - "description": dedent( - """\ - Whether to add the Community repo to the - repositories file. By default the Community - repo is not included. - """ - ), - }, - "testing_enabled": { - "type": "boolean", - "default": False, - "description": dedent( - """\ - Whether to add the Testing repo to the - repositories file. By default the Testing - repo is not included. It is only recommended - to use the Testing repo on a machine running - the ``Edge`` version of Alpine as packages - installed from Testing may have dependancies - that conflict with those in non-Edge Main or - Community repos." - """ - ), - }, - "version": { - "type": "string", - "description": dedent( - """\ - The Alpine version to use (e.g. ``v3.12`` or - ``edge``) - """ - ), - }, - }, - "required": ["version"], - "minProperties": 1, - "additionalProperties": False, - }, - "local_repo_base_url": { - "type": "string", - "description": dedent( - """\ - The base URL of an Alpine repository containing - unofficial packages - """ - ), - }, - }, - "minProperties": 1, # Either preserve_repositories or alpine_repo - "additionalProperties": False, - } - }, -} - -__doc__ = get_meta_doc(meta, schema) +__doc__ = get_meta_doc(meta) def handle(name, cfg, cloud, log, _args): @@ -222,8 +125,6 @@ def handle(name, cfg, cloud, log, _args): ) return - validate_cloudconfig_schema(cfg, schema) - # If "preserve_repositories" is explicitly set to True in # the configuration do nothing. if util.get_cfg_option_bool(apk_section, "preserve_repositories", False): diff --git a/cloudinit/config/cc_apt_pipelining.py b/cloudinit/config/cc_apt_pipelining.py index 569849d1..34b6ac0e 100644 --- a/cloudinit/config/cc_apt_pipelining.py +++ b/cloudinit/config/cc_apt_pipelining.py @@ -4,54 +4,59 @@ # # This file is part of cloud-init. See LICENSE file for license information. -""" -Apt Pipelining --------------- -**Summary:** configure apt pipelining +"""Apt Pipelining: configure apt pipelining.""" -This module configures apt's ``Acquite::http::Pipeline-Depth`` option, which -controls how apt handles HTTP pipelining. It may be useful for pipelining to be -disabled, because some web servers, such as S3 do not pipeline properly (LP: -#948461). The ``apt_pipelining`` config key may be set to ``false`` to disable -pipelining altogether. This is the default behavior. If it is set to ``none``, -``unchanged``, or ``os``, no change will be made to apt configuration and the -default setting for the distro will be used. The pipeline depth can also be -manually specified by setting ``apt_pipelining`` to a number. However, this is -not recommended. - -**Internal name:** ``cc_apt_pipelining`` - -**Module frequency:** per instance - -**Supported distros:** ubuntu, debian - -**Config keys**:: - apt_pipelining: <false/none/unchanged/os/number> -""" +from textwrap import dedent from cloudinit import util +from cloudinit.config.schema import get_meta_doc from cloudinit.settings import PER_INSTANCE frequency = PER_INSTANCE - distros = ["ubuntu", "debian"] - DEFAULT_FILE = "/etc/apt/apt.conf.d/90cloud-init-pipelining" - APT_PIPE_TPL = ( "//Written by cloud-init per 'apt_pipelining'\n" 'Acquire::http::Pipeline-Depth "%s";\n' ) - # Acquire::http::Pipeline-Depth can be a value # from 0 to 5 indicating how many outstanding requests APT should send. # A value of zero MUST be specified if the remote host does not properly linger # on TCP connections - otherwise data corruption will occur. +meta = { + "id": "cc_apt_pipelining", + "name": "Apt Pipelining", + "title": "Configure apt pipelining", + "description": dedent( + """\ + This module configures apt's ``Acquite::http::Pipeline-Depth`` option, + which controls how apt handles HTTP pipelining. It may be useful for + pipelining to be disabled, because some web servers, such as S3 do not + pipeline properly (LP: #948461). + + Value configuration options for this module are: + + * ``false`` (Default): disable pipelining altogether + * ``none``, ``unchanged``, or ``os``: use distro default + * ``<number>``: Manually specify pipeline depth. This is not recommended.""" # noqa: E501 + ), + "distros": distros, + "frequency": frequency, + "examples": [ + "apt_pipelining: false", + "apt_pipelining: none", + "apt_pipelining: unchanged", + "apt_pipelining: os", + "apt_pipelining: 3", + ], +} + +__doc__ = get_meta_doc(meta) -def handle(_name, cfg, _cloud, log, _args): - apt_pipe_value = util.get_cfg_option_str(cfg, "apt_pipelining", "os") +def handle(_name, cfg, _cloud, log, _args): + apt_pipe_value = cfg.get("apt_pipelining", "os") apt_pipe_value_s = str(apt_pipe_value).lower().strip() if apt_pipe_value_s == "false": diff --git a/cloudinit/config/cloud-init-schema.json b/cloudinit/config/cloud-init-schema.json new file mode 100644 index 00000000..afaed285 --- /dev/null +++ b/cloudinit/config/cloud-init-schema.json @@ -0,0 +1,69 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "$defs": { + "cc_apk_configure": { + "type": "object", + "properties": { + "apk_repos": { + "type": "object", + "properties": { + "preserve_repositories": { + "type": "boolean", + "default": false, + "description": "By default, cloud-init will generate a new repositories file ``/etc/apk/repositories`` based on any valid configuration settings specified within a apk_repos section of cloud config. To disable this behavior and preserve the repositories file from the pristine image, set ``preserve_repositories`` to ``true``.\n\n The ``preserve_repositories`` option overrides all other config keys that would alter ``/etc/apk/repositories``." + }, + "alpine_repo": { + "type": ["object", "null"], + "properties": { + "base_url": { + "type": "string", + "default": "https://alpine.global.ssl.fastly.net/alpine", + "description": "The base URL of an Alpine repository, or mirror, to download official packages from. If not specified then it defaults to ``https://alpine.global.ssl.fastly.net/alpine``" + }, + "community_enabled": { + "type": "boolean", + "default": false, + "description": "Whether to add the Community repo to the repositories file. By default the Community repo is not included." + }, + "testing_enabled": { + "type": "boolean", + "default": false, + "description": "Whether to add the Testing repo to the repositories file. By default the Testing repo is not included. It is only recommended to use the Testing repo on a machine running the ``Edge`` version of Alpine as packages installed from Testing may have dependancies that conflict with those in non-Edge Main or Community repos." + }, + "version": { + "type": "string", + "description": "The Alpine version to use (e.g. ``v3.12`` or ``edge``)" + } + }, + "required": ["version"], + "minProperties": 1, + "additionalProperties": false + }, + "local_repo_base_url": { + "type": "string", + "description": "The base URL of an Alpine repository containing unofficial packages" + } + }, + "minProperties": 1, + "additionalProperties": false + } + } + }, + "cc_apt_pipelining": { + "type": "object", + "properties": { + "apt_pipelining": { + "oneOf": [ + {"type": "integer"}, + {"type": "boolean"}, + {"type": "string", "enum": ["none", "unchanged", "os"]} + ] + } + } + } + }, + "allOf": [ + { "$ref": "#/$defs/cc_apk_configure" }, + { "$ref": "#/$defs/cc_apt_pipelining" } + ] +} diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py index f54cf18f..1f969c97 100644 --- a/cloudinit/config/schema.py +++ b/cloudinit/config/schema.py @@ -2,6 +2,7 @@ """schema.py: Set of module functions for processing cloud-config schema.""" import argparse +import json import logging import os import re @@ -166,14 +167,18 @@ def validate_cloudconfig_metaschema(validator, schema: dict, throw=True): def validate_cloudconfig_schema( - config: dict, schema: dict, strict=False, strict_metaschema=False + config: dict, + schema: dict = None, + strict: bool = False, + strict_metaschema: bool = False, ): """Validate provided config meets the schema definition. @param config: Dict of cloud configuration settings validated against schema. Ignored if strict_metaschema=True @param schema: jsonschema dict describing the supported schema definition - for the cloud config module (config.cc_*). + for the cloud config module (config.cc_*). If None, validate against + global schema. @param strict: Boolean, when True raise SchemaValidationErrors instead of logging warnings. @param strict_metaschema: Boolean, when True validates schema using strict @@ -183,6 +188,8 @@ def validate_cloudconfig_schema( against the provided schema. @raises: RuntimeError when provided config sourced from YAML is not a dict. """ + if schema is None: + schema = get_schema() try: (cloudinitValidator, FormatChecker) = get_jsonschema_validator() if strict_metaschema: @@ -203,7 +210,9 @@ def validate_cloudconfig_schema( raise SchemaValidationError(errors) else: messages = ["{0}: {1}".format(k, msg) for k, msg in errors] - LOG.warning("Invalid config:\n%s", "\n".join(messages)) + LOG.warning( + "Invalid cloud-config provided:\n%s", "\n".join(messages) + ) def annotated_cloudconfig_file(cloudconfig, original_content, schema_errors): @@ -347,12 +356,14 @@ def _schemapath_for_cloudconfig(config, original_content): @param config: The yaml.loaded config dictionary of a cloud-config file. @param original_content: The simple file content of the cloud-config file """ - # FIXME Doesn't handle multi-line lists or multi-line strings + # TODO( handle multi-line lists or multi-line strings, inline dicts) content_lines = original_content.decode().split("\n") schema_line_numbers = {} list_index = 0 RE_YAML_INDENT = r"^(\s*)" scopes = [] + if not config: + return {} # No YAML config dict, no schemapaths to annotate for line_number, line in enumerate(content_lines, 1): indent_depth = len(re.match(RE_YAML_INDENT, line).groups()[0]) line = line.strip() @@ -369,7 +380,6 @@ def _schemapath_for_cloudconfig(config, original_content): if path_prefix and path_prefix.endswith(previous_list_idx): path_prefix = path_prefix[: -len(previous_list_idx)] key = str(list_index) - schema_line_numbers[key] = line_number item_indent = len(re.match(RE_YAML_INDENT, line[1:]).groups()[0]) item_indent += 1 # For the leading '-' character previous_depth = indent_depth @@ -380,7 +390,7 @@ def _schemapath_for_cloudconfig(config, original_content): # Process non-list lines setting value if present list_index = 0 key, value = line.split(":", 1) - if path_prefix: + if path_prefix and indent_depth > previous_depth: # Append any existing path_prefix for a fully-pathed key key = path_prefix + "." + key while indent_depth <= previous_depth: @@ -409,10 +419,17 @@ def _get_property_type(property_dict: dict) -> str: jsonschema. """ property_type = property_dict.get("type") - if property_type is None and property_dict.get("enum"): - property_type = [ - str(_YAML_MAP.get(k, k)) for k in property_dict["enum"] - ] + if property_type is None: + if property_dict.get("enum"): + property_type = [ + str(_YAML_MAP.get(k, k)) for k in property_dict["enum"] + ] + elif property_dict.get("oneOf"): + property_type = [ + subschema["type"] + for subschema in property_dict.get("oneOf") + if subschema.get("type") + ] if isinstance(property_type, list): property_type = "/".join(property_type) items = property_dict.get("items", {}) @@ -449,7 +466,7 @@ def _parse_description(description, prefix) -> str: return description -def _get_property_doc(schema: dict, prefix=" ") -> str: +def _get_property_doc(schema: dict, defs: dict, prefix=" ") -> str: """Return restructured text describing the supported schema properties.""" new_prefix = prefix + " " properties = [] @@ -460,6 +477,10 @@ def _get_property_doc(schema: dict, prefix=" ") -> str: for props in property_keys: for prop_key, prop_config in props.items(): + if "$ref" in prop_config: + # Update the defined references in subschema for doc rendering + ref = defs[prop_config["$ref"].replace("#/$defs/", "")] + prop_config.update(ref) # Define prop_name and description for SCHEMA_PROPERTY_TMPL description = prop_config.get("description", "") @@ -478,7 +499,9 @@ def _get_property_doc(schema: dict, prefix=" ") -> str: if isinstance(items, list): for item in items: properties.append( - _get_property_doc(item, prefix=new_prefix) + _get_property_doc( + item, defs=defs, prefix=new_prefix + ) ) elif isinstance(items, dict) and ( items.get("properties") or items.get("patternProperties") @@ -490,14 +513,16 @@ def _get_property_doc(schema: dict, prefix=" ") -> str: ) new_prefix += " " properties.append( - _get_property_doc(items, prefix=new_prefix) + _get_property_doc(items, defs=defs, prefix=new_prefix) ) if ( "properties" in prop_config or "patternProperties" in prop_config ): properties.append( - _get_property_doc(prop_config, prefix=new_prefix) + _get_property_doc( + prop_config, defs=defs, prefix=new_prefix + ) ) return "\n\n".join(properties) @@ -520,15 +545,18 @@ def _get_examples(meta: MetaSchema) -> str: return rst_content -def get_meta_doc(meta: MetaSchema, schema: dict) -> str: +def get_meta_doc(meta: MetaSchema, schema: dict = None) -> str: """Return reStructured text rendering the provided metadata. @param meta: Dict of metadata to render. + @param schema: Optional module schema, if absent, read global schema. @raise KeyError: If metadata lacks an expected key. """ + if schema is None: + schema = get_schema() if not meta or not schema: - raise ValueError("Expected meta and schema") + raise ValueError("Expected non-empty meta and schema") keys = set(meta.keys()) expected = set( { @@ -557,8 +585,11 @@ def get_meta_doc(meta: MetaSchema, schema: dict) -> str: # cast away type annotation meta_copy = dict(deepcopy(meta)) + defs = schema.get("$defs", {}) + if defs.get(meta["id"]): + schema = defs.get(meta["id"]) try: - meta_copy["property_doc"] = _get_property_doc(schema) + meta_copy["property_doc"] = _get_property_doc(schema, defs=defs) except AttributeError: LOG.warning("Unable to render property_doc due to invalid schema") meta_copy["property_doc"] = "" @@ -593,7 +624,7 @@ def load_doc(requested_modules: list) -> str: for mod_name in all_modules: if "all" in requested_modules or mod_name in requested_modules: (mod_locs, _) = importer.find_module( - mod_name, ["cloudinit.config"], ["schema"] + mod_name, ["cloudinit.config"], ["meta"] ) if mod_locs: mod = importer.import_module(mod_locs[0]) @@ -602,14 +633,34 @@ def load_doc(requested_modules: list) -> str: def get_schema() -> dict: - """Return jsonschema coalesced from all cc_* cloud-config module.""" - full_schema = { - "$schema": "http://json-schema.org/draft-04/schema#", - "id": "cloud-config-schema", - "allOf": [], - } + """Return jsonschema coalesced from all cc_* cloud-config modules.""" + schema_file = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "cloud-init-schema.json" + ) + full_schema = None + try: + full_schema = json.loads(load_file(schema_file)) + except Exception as e: + LOG.warning("Cannot parse JSON schema file %s. %s", schema_file, e) + if not full_schema: + LOG.warning( + "No base JSON schema files found at %s." + " Setting default empty schema", + schema_file, + ) + full_schema = { + "$defs": {}, + "$schema": "http://json-schema.org/draft-04/schema#", + "allOf": [], + } + # TODO( Drop the get_modules loop when all legacy cc_* schema migrates ) + # Supplement base_schema with any legacy modules which still contain a + # "schema" attribute. Legacy cc_* modules will be migrated to use the + # store module schema in the composite cloud-init-schema-<version>.json + # and will drop "schema" at that point. for (_, mod_name) in get_modules().items(): + # All cc_* modules need a "meta" attribute to represent schema defs (mod_locs, _) = importer.find_module( mod_name, ["cloudinit.config"], ["schema"] ) |