summaryrefslogtreecommitdiff
path: root/cloudinit
diff options
context:
space:
mode:
authorChad Smith <chad.smith@canonical.com>2022-01-18 10:05:29 -0700
committerGitHub <noreply@github.com>2022-01-18 10:05:29 -0700
commit4ba6fd283674df1ef25300d91c6d2061910744be (patch)
treec70e12ed177e8383a1e2e5fd1a1fdb041ac1d0b6 /cloudinit
parent45484c0b05d39461500212481e2466155dd1e210 (diff)
downloadvyos-cloud-init-4ba6fd283674df1ef25300d91c6d2061910744be.tar.gz
vyos-cloud-init-4ba6fd283674df1ef25300d91c6d2061910744be.zip
Single JSON schema validation in early boot (#1175)
Package a single JSON schema file for user-data validation at cloudinit/config/cloud-init-schema.json. Perform validate_cloudconfig_schema call to just after the user-data is consumed. This will allow single validation of all user-data against the full schema instead of repetitive validatation calls against each cloud-config module (cloudinit.config.cc_*) sub-schemas. This branch defines the simple apt_pipelining schema and migrates existing cc_apk_configure into cloud-init-schema.json. The expectation will be additional branches to migrate from legacy "schema" attributes inside each cloud-config module toward unique cc_<module_name> definitions in the global shema file under "$defs" of cloud-init-schema-X.Y..json. Before legacy sub-schema definitions are migrated the following funcs grew support to read sub-schemas from both static cloud-init-schema.json and the individual cloud-config module "schema" attributes: - get_schema: source base schema file from cloud-init-schema.json and supplement with all legacy cloud-config module "schema" defs - get_meta_doc: optional schema param so cloud-config modules no longer provide the own local sub-schemas - _get_property_doc: render only documentation of sub-schema based on meta['id'] provided - validate_cloudconfig_schema: allow optional schema param Additionally, fix two minor bugs in _schemapath_for_cloudconfig: - `cloud-init devel schema --annotate` which results in a Traceback if two keys at the same indent level have invalid types. - exit early on empty cloud-config to avoid a Traceback on the CLI
Diffstat (limited to 'cloudinit')
-rw-r--r--cloudinit/cmd/main.py7
-rw-r--r--cloudinit/config/cc_apk_configure.py103
-rw-r--r--cloudinit/config/cc_apt_pipelining.py63
-rw-r--r--cloudinit/config/cloud-init-schema.json69
-rw-r--r--cloudinit/config/schema.py99
5 files changed, 187 insertions, 154 deletions
diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py
index e67edbc3..c9be41b3 100644
--- a/cloudinit/cmd/main.py
+++ b/cloudinit/cmd/main.py
@@ -22,6 +22,7 @@ from cloudinit import patcher
patcher.patch_logging()
+from cloudinit.config.schema import validate_cloudconfig_schema
from cloudinit import log as logging
from cloudinit import netinfo
from cloudinit import signal_handler
@@ -474,6 +475,12 @@ def main_init(name, args):
util.logexc(LOG, "Consuming user data failed!")
return (init.datasource, ["Consuming user data failed!"])
+ # Validate user-data adheres to schema definition
+ if os.path.exists(init.paths.get_ipath_cur("userdata_raw")):
+ validate_cloudconfig_schema(config=init.cfg, strict=False)
+ else:
+ LOG.debug("Skipping user-data validation. No user-data found.")
+
apply_reporting_cfg(init.cfg)
# Stage 8 - re-read and apply relevant cloud-config to include user-data
diff --git a/cloudinit/config/cc_apk_configure.py b/cloudinit/config/cc_apk_configure.py
index a615c814..2cb2dad1 100644
--- a/cloudinit/config/cc_apk_configure.py
+++ b/cloudinit/config/cc_apk_configure.py
@@ -10,7 +10,7 @@ from textwrap import dedent
from cloudinit import log as logging
from cloudinit import temp_utils, templater, util
-from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema
+from cloudinit.config.schema import get_meta_doc
from cloudinit.settings import PER_INSTANCE
LOG = logging.getLogger(__name__)
@@ -102,104 +102,7 @@ meta = {
"frequency": frequency,
}
-schema = {
- "type": "object",
- "properties": {
- "apk_repos": {
- "type": "object",
- "properties": {
- "preserve_repositories": {
- "type": "boolean",
- "default": False,
- "description": dedent(
- """\
- By default, cloud-init will generate a new repositories
- file ``/etc/apk/repositories`` based on any valid
- configuration settings specified within a apk_repos
- section of cloud config. To disable this behavior and
- preserve the repositories file from the pristine image,
- set ``preserve_repositories`` to ``true``.
-
- The ``preserve_repositories`` option overrides
- all other config keys that would alter
- ``/etc/apk/repositories``.
- """
- ),
- },
- "alpine_repo": {
- "type": ["object", "null"],
- "properties": {
- "base_url": {
- "type": "string",
- "default": DEFAULT_MIRROR,
- "description": dedent(
- """\
- The base URL of an Alpine repository, or
- mirror, to download official packages from.
- If not specified then it defaults to ``{}``
- """.format(
- DEFAULT_MIRROR
- )
- ),
- },
- "community_enabled": {
- "type": "boolean",
- "default": False,
- "description": dedent(
- """\
- Whether to add the Community repo to the
- repositories file. By default the Community
- repo is not included.
- """
- ),
- },
- "testing_enabled": {
- "type": "boolean",
- "default": False,
- "description": dedent(
- """\
- Whether to add the Testing repo to the
- repositories file. By default the Testing
- repo is not included. It is only recommended
- to use the Testing repo on a machine running
- the ``Edge`` version of Alpine as packages
- installed from Testing may have dependancies
- that conflict with those in non-Edge Main or
- Community repos."
- """
- ),
- },
- "version": {
- "type": "string",
- "description": dedent(
- """\
- The Alpine version to use (e.g. ``v3.12`` or
- ``edge``)
- """
- ),
- },
- },
- "required": ["version"],
- "minProperties": 1,
- "additionalProperties": False,
- },
- "local_repo_base_url": {
- "type": "string",
- "description": dedent(
- """\
- The base URL of an Alpine repository containing
- unofficial packages
- """
- ),
- },
- },
- "minProperties": 1, # Either preserve_repositories or alpine_repo
- "additionalProperties": False,
- }
- },
-}
-
-__doc__ = get_meta_doc(meta, schema)
+__doc__ = get_meta_doc(meta)
def handle(name, cfg, cloud, log, _args):
@@ -222,8 +125,6 @@ def handle(name, cfg, cloud, log, _args):
)
return
- validate_cloudconfig_schema(cfg, schema)
-
# If "preserve_repositories" is explicitly set to True in
# the configuration do nothing.
if util.get_cfg_option_bool(apk_section, "preserve_repositories", False):
diff --git a/cloudinit/config/cc_apt_pipelining.py b/cloudinit/config/cc_apt_pipelining.py
index 569849d1..34b6ac0e 100644
--- a/cloudinit/config/cc_apt_pipelining.py
+++ b/cloudinit/config/cc_apt_pipelining.py
@@ -4,54 +4,59 @@
#
# This file is part of cloud-init. See LICENSE file for license information.
-"""
-Apt Pipelining
---------------
-**Summary:** configure apt pipelining
+"""Apt Pipelining: configure apt pipelining."""
-This module configures apt's ``Acquite::http::Pipeline-Depth`` option, which
-controls how apt handles HTTP pipelining. It may be useful for pipelining to be
-disabled, because some web servers, such as S3 do not pipeline properly (LP:
-#948461). The ``apt_pipelining`` config key may be set to ``false`` to disable
-pipelining altogether. This is the default behavior. If it is set to ``none``,
-``unchanged``, or ``os``, no change will be made to apt configuration and the
-default setting for the distro will be used. The pipeline depth can also be
-manually specified by setting ``apt_pipelining`` to a number. However, this is
-not recommended.
-
-**Internal name:** ``cc_apt_pipelining``
-
-**Module frequency:** per instance
-
-**Supported distros:** ubuntu, debian
-
-**Config keys**::
- apt_pipelining: <false/none/unchanged/os/number>
-"""
+from textwrap import dedent
from cloudinit import util
+from cloudinit.config.schema import get_meta_doc
from cloudinit.settings import PER_INSTANCE
frequency = PER_INSTANCE
-
distros = ["ubuntu", "debian"]
-
DEFAULT_FILE = "/etc/apt/apt.conf.d/90cloud-init-pipelining"
-
APT_PIPE_TPL = (
"//Written by cloud-init per 'apt_pipelining'\n"
'Acquire::http::Pipeline-Depth "%s";\n'
)
-
# Acquire::http::Pipeline-Depth can be a value
# from 0 to 5 indicating how many outstanding requests APT should send.
# A value of zero MUST be specified if the remote host does not properly linger
# on TCP connections - otherwise data corruption will occur.
+meta = {
+ "id": "cc_apt_pipelining",
+ "name": "Apt Pipelining",
+ "title": "Configure apt pipelining",
+ "description": dedent(
+ """\
+ This module configures apt's ``Acquite::http::Pipeline-Depth`` option,
+ which controls how apt handles HTTP pipelining. It may be useful for
+ pipelining to be disabled, because some web servers, such as S3 do not
+ pipeline properly (LP: #948461).
+
+ Value configuration options for this module are:
+
+ * ``false`` (Default): disable pipelining altogether
+ * ``none``, ``unchanged``, or ``os``: use distro default
+ * ``<number>``: Manually specify pipeline depth. This is not recommended.""" # noqa: E501
+ ),
+ "distros": distros,
+ "frequency": frequency,
+ "examples": [
+ "apt_pipelining: false",
+ "apt_pipelining: none",
+ "apt_pipelining: unchanged",
+ "apt_pipelining: os",
+ "apt_pipelining: 3",
+ ],
+}
+
+__doc__ = get_meta_doc(meta)
-def handle(_name, cfg, _cloud, log, _args):
- apt_pipe_value = util.get_cfg_option_str(cfg, "apt_pipelining", "os")
+def handle(_name, cfg, _cloud, log, _args):
+ apt_pipe_value = cfg.get("apt_pipelining", "os")
apt_pipe_value_s = str(apt_pipe_value).lower().strip()
if apt_pipe_value_s == "false":
diff --git a/cloudinit/config/cloud-init-schema.json b/cloudinit/config/cloud-init-schema.json
new file mode 100644
index 00000000..afaed285
--- /dev/null
+++ b/cloudinit/config/cloud-init-schema.json
@@ -0,0 +1,69 @@
+{
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "$defs": {
+ "cc_apk_configure": {
+ "type": "object",
+ "properties": {
+ "apk_repos": {
+ "type": "object",
+ "properties": {
+ "preserve_repositories": {
+ "type": "boolean",
+ "default": false,
+ "description": "By default, cloud-init will generate a new repositories file ``/etc/apk/repositories`` based on any valid configuration settings specified within a apk_repos section of cloud config. To disable this behavior and preserve the repositories file from the pristine image, set ``preserve_repositories`` to ``true``.\n\n The ``preserve_repositories`` option overrides all other config keys that would alter ``/etc/apk/repositories``."
+ },
+ "alpine_repo": {
+ "type": ["object", "null"],
+ "properties": {
+ "base_url": {
+ "type": "string",
+ "default": "https://alpine.global.ssl.fastly.net/alpine",
+ "description": "The base URL of an Alpine repository, or mirror, to download official packages from. If not specified then it defaults to ``https://alpine.global.ssl.fastly.net/alpine``"
+ },
+ "community_enabled": {
+ "type": "boolean",
+ "default": false,
+ "description": "Whether to add the Community repo to the repositories file. By default the Community repo is not included."
+ },
+ "testing_enabled": {
+ "type": "boolean",
+ "default": false,
+ "description": "Whether to add the Testing repo to the repositories file. By default the Testing repo is not included. It is only recommended to use the Testing repo on a machine running the ``Edge`` version of Alpine as packages installed from Testing may have dependancies that conflict with those in non-Edge Main or Community repos."
+ },
+ "version": {
+ "type": "string",
+ "description": "The Alpine version to use (e.g. ``v3.12`` or ``edge``)"
+ }
+ },
+ "required": ["version"],
+ "minProperties": 1,
+ "additionalProperties": false
+ },
+ "local_repo_base_url": {
+ "type": "string",
+ "description": "The base URL of an Alpine repository containing unofficial packages"
+ }
+ },
+ "minProperties": 1,
+ "additionalProperties": false
+ }
+ }
+ },
+ "cc_apt_pipelining": {
+ "type": "object",
+ "properties": {
+ "apt_pipelining": {
+ "oneOf": [
+ {"type": "integer"},
+ {"type": "boolean"},
+ {"type": "string", "enum": ["none", "unchanged", "os"]}
+ ]
+ }
+ }
+ }
+ },
+ "allOf": [
+ { "$ref": "#/$defs/cc_apk_configure" },
+ { "$ref": "#/$defs/cc_apt_pipelining" }
+ ]
+}
diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py
index f54cf18f..1f969c97 100644
--- a/cloudinit/config/schema.py
+++ b/cloudinit/config/schema.py
@@ -2,6 +2,7 @@
"""schema.py: Set of module functions for processing cloud-config schema."""
import argparse
+import json
import logging
import os
import re
@@ -166,14 +167,18 @@ def validate_cloudconfig_metaschema(validator, schema: dict, throw=True):
def validate_cloudconfig_schema(
- config: dict, schema: dict, strict=False, strict_metaschema=False
+ config: dict,
+ schema: dict = None,
+ strict: bool = False,
+ strict_metaschema: bool = False,
):
"""Validate provided config meets the schema definition.
@param config: Dict of cloud configuration settings validated against
schema. Ignored if strict_metaschema=True
@param schema: jsonschema dict describing the supported schema definition
- for the cloud config module (config.cc_*).
+ for the cloud config module (config.cc_*). If None, validate against
+ global schema.
@param strict: Boolean, when True raise SchemaValidationErrors instead of
logging warnings.
@param strict_metaschema: Boolean, when True validates schema using strict
@@ -183,6 +188,8 @@ def validate_cloudconfig_schema(
against the provided schema.
@raises: RuntimeError when provided config sourced from YAML is not a dict.
"""
+ if schema is None:
+ schema = get_schema()
try:
(cloudinitValidator, FormatChecker) = get_jsonschema_validator()
if strict_metaschema:
@@ -203,7 +210,9 @@ def validate_cloudconfig_schema(
raise SchemaValidationError(errors)
else:
messages = ["{0}: {1}".format(k, msg) for k, msg in errors]
- LOG.warning("Invalid config:\n%s", "\n".join(messages))
+ LOG.warning(
+ "Invalid cloud-config provided:\n%s", "\n".join(messages)
+ )
def annotated_cloudconfig_file(cloudconfig, original_content, schema_errors):
@@ -347,12 +356,14 @@ def _schemapath_for_cloudconfig(config, original_content):
@param config: The yaml.loaded config dictionary of a cloud-config file.
@param original_content: The simple file content of the cloud-config file
"""
- # FIXME Doesn't handle multi-line lists or multi-line strings
+ # TODO( handle multi-line lists or multi-line strings, inline dicts)
content_lines = original_content.decode().split("\n")
schema_line_numbers = {}
list_index = 0
RE_YAML_INDENT = r"^(\s*)"
scopes = []
+ if not config:
+ return {} # No YAML config dict, no schemapaths to annotate
for line_number, line in enumerate(content_lines, 1):
indent_depth = len(re.match(RE_YAML_INDENT, line).groups()[0])
line = line.strip()
@@ -369,7 +380,6 @@ def _schemapath_for_cloudconfig(config, original_content):
if path_prefix and path_prefix.endswith(previous_list_idx):
path_prefix = path_prefix[: -len(previous_list_idx)]
key = str(list_index)
- schema_line_numbers[key] = line_number
item_indent = len(re.match(RE_YAML_INDENT, line[1:]).groups()[0])
item_indent += 1 # For the leading '-' character
previous_depth = indent_depth
@@ -380,7 +390,7 @@ def _schemapath_for_cloudconfig(config, original_content):
# Process non-list lines setting value if present
list_index = 0
key, value = line.split(":", 1)
- if path_prefix:
+ if path_prefix and indent_depth > previous_depth:
# Append any existing path_prefix for a fully-pathed key
key = path_prefix + "." + key
while indent_depth <= previous_depth:
@@ -409,10 +419,17 @@ def _get_property_type(property_dict: dict) -> str:
jsonschema.
"""
property_type = property_dict.get("type")
- if property_type is None and property_dict.get("enum"):
- property_type = [
- str(_YAML_MAP.get(k, k)) for k in property_dict["enum"]
- ]
+ if property_type is None:
+ if property_dict.get("enum"):
+ property_type = [
+ str(_YAML_MAP.get(k, k)) for k in property_dict["enum"]
+ ]
+ elif property_dict.get("oneOf"):
+ property_type = [
+ subschema["type"]
+ for subschema in property_dict.get("oneOf")
+ if subschema.get("type")
+ ]
if isinstance(property_type, list):
property_type = "/".join(property_type)
items = property_dict.get("items", {})
@@ -449,7 +466,7 @@ def _parse_description(description, prefix) -> str:
return description
-def _get_property_doc(schema: dict, prefix=" ") -> str:
+def _get_property_doc(schema: dict, defs: dict, prefix=" ") -> str:
"""Return restructured text describing the supported schema properties."""
new_prefix = prefix + " "
properties = []
@@ -460,6 +477,10 @@ def _get_property_doc(schema: dict, prefix=" ") -> str:
for props in property_keys:
for prop_key, prop_config in props.items():
+ if "$ref" in prop_config:
+ # Update the defined references in subschema for doc rendering
+ ref = defs[prop_config["$ref"].replace("#/$defs/", "")]
+ prop_config.update(ref)
# Define prop_name and description for SCHEMA_PROPERTY_TMPL
description = prop_config.get("description", "")
@@ -478,7 +499,9 @@ def _get_property_doc(schema: dict, prefix=" ") -> str:
if isinstance(items, list):
for item in items:
properties.append(
- _get_property_doc(item, prefix=new_prefix)
+ _get_property_doc(
+ item, defs=defs, prefix=new_prefix
+ )
)
elif isinstance(items, dict) and (
items.get("properties") or items.get("patternProperties")
@@ -490,14 +513,16 @@ def _get_property_doc(schema: dict, prefix=" ") -> str:
)
new_prefix += " "
properties.append(
- _get_property_doc(items, prefix=new_prefix)
+ _get_property_doc(items, defs=defs, prefix=new_prefix)
)
if (
"properties" in prop_config
or "patternProperties" in prop_config
):
properties.append(
- _get_property_doc(prop_config, prefix=new_prefix)
+ _get_property_doc(
+ prop_config, defs=defs, prefix=new_prefix
+ )
)
return "\n\n".join(properties)
@@ -520,15 +545,18 @@ def _get_examples(meta: MetaSchema) -> str:
return rst_content
-def get_meta_doc(meta: MetaSchema, schema: dict) -> str:
+def get_meta_doc(meta: MetaSchema, schema: dict = None) -> str:
"""Return reStructured text rendering the provided metadata.
@param meta: Dict of metadata to render.
+ @param schema: Optional module schema, if absent, read global schema.
@raise KeyError: If metadata lacks an expected key.
"""
+ if schema is None:
+ schema = get_schema()
if not meta or not schema:
- raise ValueError("Expected meta and schema")
+ raise ValueError("Expected non-empty meta and schema")
keys = set(meta.keys())
expected = set(
{
@@ -557,8 +585,11 @@ def get_meta_doc(meta: MetaSchema, schema: dict) -> str:
# cast away type annotation
meta_copy = dict(deepcopy(meta))
+ defs = schema.get("$defs", {})
+ if defs.get(meta["id"]):
+ schema = defs.get(meta["id"])
try:
- meta_copy["property_doc"] = _get_property_doc(schema)
+ meta_copy["property_doc"] = _get_property_doc(schema, defs=defs)
except AttributeError:
LOG.warning("Unable to render property_doc due to invalid schema")
meta_copy["property_doc"] = ""
@@ -593,7 +624,7 @@ def load_doc(requested_modules: list) -> str:
for mod_name in all_modules:
if "all" in requested_modules or mod_name in requested_modules:
(mod_locs, _) = importer.find_module(
- mod_name, ["cloudinit.config"], ["schema"]
+ mod_name, ["cloudinit.config"], ["meta"]
)
if mod_locs:
mod = importer.import_module(mod_locs[0])
@@ -602,14 +633,34 @@ def load_doc(requested_modules: list) -> str:
def get_schema() -> dict:
- """Return jsonschema coalesced from all cc_* cloud-config module."""
- full_schema = {
- "$schema": "http://json-schema.org/draft-04/schema#",
- "id": "cloud-config-schema",
- "allOf": [],
- }
+ """Return jsonschema coalesced from all cc_* cloud-config modules."""
+ schema_file = os.path.join(
+ os.path.dirname(os.path.abspath(__file__)), "cloud-init-schema.json"
+ )
+ full_schema = None
+ try:
+ full_schema = json.loads(load_file(schema_file))
+ except Exception as e:
+ LOG.warning("Cannot parse JSON schema file %s. %s", schema_file, e)
+ if not full_schema:
+ LOG.warning(
+ "No base JSON schema files found at %s."
+ " Setting default empty schema",
+ schema_file,
+ )
+ full_schema = {
+ "$defs": {},
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "allOf": [],
+ }
+ # TODO( Drop the get_modules loop when all legacy cc_* schema migrates )
+ # Supplement base_schema with any legacy modules which still contain a
+ # "schema" attribute. Legacy cc_* modules will be migrated to use the
+ # store module schema in the composite cloud-init-schema-<version>.json
+ # and will drop "schema" at that point.
for (_, mod_name) in get_modules().items():
+ # All cc_* modules need a "meta" attribute to represent schema defs
(mod_locs, _) = importer.find_module(
mod_name, ["cloudinit.config"], ["schema"]
)