From bedac77e9348e7a54c0ec364fb61df90cd893972 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Mon, 6 Dec 2021 15:27:12 -0700 Subject: Add Strict Metaschema Validation (#1101) Improve schema validation. This adds strict validation of config module definitions at testing time, with plumbing included for future runtime validation. This eliminates a class of bugs resulting from schemas that have definitions that are incorrect, but get interpreted by jsonschema as "additionalProperties" that are therefore ignored. - Add strict meta-schema for jsonschema unit test validation - Separate schema from module metadata structure - Improve type annotations for various functions and data types Cleanup: - Remove unused jsonschema "required" elements - Eliminate manual memoization in schema.py:get_schema(), reference module.__doc__ directly --- cloudinit/config/cc_apk_configure.py | 11 +- cloudinit/config/cc_apt_configure.py | 11 +- cloudinit/config/cc_bootcmd.py | 11 +- cloudinit/config/cc_chef.py | 11 +- cloudinit/config/cc_install_hotplug.py | 9 +- cloudinit/config/cc_locale.py | 9 +- cloudinit/config/cc_ntp.py | 11 +- cloudinit/config/cc_resizefs.py | 10 +- cloudinit/config/cc_runcmd.py | 11 +- cloudinit/config/cc_snap.py | 11 +- cloudinit/config/cc_ubuntu_advantage.py | 10 +- cloudinit/config/cc_ubuntu_drivers.py | 10 +- cloudinit/config/cc_write_files.py | 10 +- cloudinit/config/cc_write_files_deferred.py | 41 ++-- cloudinit/config/cc_zypper_add_repo.py | 10 +- cloudinit/config/schema.py | 288 ++++++++++++++++++++-------- 16 files changed, 310 insertions(+), 164 deletions(-) (limited to 'cloudinit/config') diff --git a/cloudinit/config/cc_apk_configure.py b/cloudinit/config/cc_apk_configure.py index 84d7a0b6..d227a58d 100644 --- a/cloudinit/config/cc_apk_configure.py +++ b/cloudinit/config/cc_apk_configure.py @@ -12,8 +12,7 @@ from cloudinit import log as logging from cloudinit import temp_utils from cloudinit import templater from cloudinit import util -from cloudinit.config.schema import ( - get_schema_doc, validate_cloudconfig_schema) +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit.settings import PER_INSTANCE LOG = logging.getLogger(__name__) @@ -56,7 +55,7 @@ REPOSITORIES_TEMPLATE = """\ frequency = PER_INSTANCE distros = ['alpine'] -schema = { +meta = { 'id': 'cc_apk_configure', 'name': 'APK Configure', 'title': 'Configure apk repositories file', @@ -95,6 +94,9 @@ schema = { """), ], 'frequency': frequency, +} + +schema = { 'type': 'object', 'properties': { 'apk_repos': { @@ -171,14 +173,13 @@ schema = { """) } }, - 'required': [], 'minProperties': 1, # Either preserve_repositories or alpine_repo 'additionalProperties': False, } } } -__doc__ = get_schema_doc(schema) +__doc__ = get_meta_doc(meta, schema) def handle(name, cfg, cloud, log, _args): diff --git a/cloudinit/config/cc_apt_configure.py b/cloudinit/config/cc_apt_configure.py index 86d0feae..2e844c2c 100644 --- a/cloudinit/config/cc_apt_configure.py +++ b/cloudinit/config/cc_apt_configure.py @@ -14,8 +14,7 @@ import re import pathlib from textwrap import dedent -from cloudinit.config.schema import ( - get_schema_doc, validate_cloudconfig_schema) +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit import gpg from cloudinit import log as logging from cloudinit import subp @@ -75,7 +74,8 @@ mirror_property = { } } } -schema = { + +meta = { 'id': 'cc_apt_configure', 'name': 'Apt Configure', 'title': 'Configure apt for the user', @@ -155,6 +155,9 @@ schema = { ------END PGP PUBLIC KEY BLOCK-------""")], 'frequency': frequency, +} + +schema = { 'type': 'object', 'properties': { 'apt': { @@ -398,7 +401,7 @@ schema = { } } -__doc__ = get_schema_doc(schema) +__doc__ = get_meta_doc(meta, schema) # place where apt stores cached repository data diff --git a/cloudinit/config/cc_bootcmd.py b/cloudinit/config/cc_bootcmd.py index 246e4497..06f7a26e 100644 --- a/cloudinit/config/cc_bootcmd.py +++ b/cloudinit/config/cc_bootcmd.py @@ -12,8 +12,7 @@ import os from textwrap import dedent -from cloudinit.config.schema import ( - get_schema_doc, validate_cloudconfig_schema) +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit.settings import PER_ALWAYS from cloudinit import temp_utils from cloudinit import subp @@ -29,7 +28,7 @@ frequency = PER_ALWAYS distros = ['all'] -schema = { +meta = { 'id': 'cc_bootcmd', 'name': 'Bootcmd', 'title': 'Run arbitrary commands early in the boot process', @@ -57,6 +56,9 @@ schema = { - [ cloud-init-per, once, mymkfs, mkfs, /dev/vdb ] """)], 'frequency': PER_ALWAYS, +} + +schema = { 'type': 'object', 'properties': { 'bootcmd': { @@ -69,12 +71,11 @@ schema = { 'additionalItems': False, # Reject items of non-string non-list 'additionalProperties': False, 'minItems': 1, - 'required': [], } } } -__doc__ = get_schema_doc(schema) # Supplement python help() +__doc__ = get_meta_doc(meta, schema) # Supplement python help() def handle(name, cfg, cloud, log, _args): diff --git a/cloudinit/config/cc_chef.py b/cloudinit/config/cc_chef.py index 7b20222e..ed734d1c 100644 --- a/cloudinit/config/cc_chef.py +++ b/cloudinit/config/cc_chef.py @@ -14,8 +14,7 @@ import os from textwrap import dedent from cloudinit import subp -from cloudinit.config.schema import ( - get_schema_doc, validate_cloudconfig_schema) +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit import templater from cloudinit import temp_utils from cloudinit import url_helper @@ -89,7 +88,8 @@ CHEF_EXEC_DEF_ARGS = tuple(['-d', '-i', '1800', '-s', '20']) frequency = PER_ALWAYS distros = ["all"] -schema = { + +meta = { 'id': 'cc_chef', 'name': 'Chef', 'title': 'module that configures, starts and installs chef', @@ -126,6 +126,9 @@ schema = { ssl_verify_mode: :verify_peer validation_name: yourorg-validator""")], 'frequency': frequency, +} + +schema = { 'type': 'object', 'properties': { 'chef': { @@ -357,7 +360,7 @@ schema = { } } -__doc__ = get_schema_doc(schema) +__doc__ = get_meta_doc(meta, schema) def post_run_chef(chef_cfg, log): diff --git a/cloudinit/config/cc_install_hotplug.py b/cloudinit/config/cc_install_hotplug.py index da98c409..9b4075cc 100644 --- a/cloudinit/config/cc_install_hotplug.py +++ b/cloudinit/config/cc_install_hotplug.py @@ -6,7 +6,7 @@ from textwrap import dedent from cloudinit import util from cloudinit import subp from cloudinit import stages -from cloudinit.config.schema import get_schema_doc, validate_cloudconfig_schema +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit.distros import ALL_DISTROS from cloudinit.event import EventType, EventScope from cloudinit.settings import PER_INSTANCE @@ -15,7 +15,7 @@ from cloudinit.settings import PER_INSTANCE frequency = PER_INSTANCE distros = [ALL_DISTROS] -schema = { +meta = { "id": "cc_install_hotplug", "name": "Install Hotplug", "title": "Install hotplug if supported and enabled", @@ -49,6 +49,9 @@ schema = { """), ], "frequency": frequency, +} + +schema = { "type": "object", "properties": { "updates": { @@ -81,7 +84,7 @@ schema = { } } -__doc__ = get_schema_doc(schema) +__doc__ = get_meta_doc(meta, schema) HOTPLUG_UDEV_PATH = "/etc/udev/rules.d/10-cloud-init-hook-hotplug.rules" diff --git a/cloudinit/config/cc_locale.py b/cloudinit/config/cc_locale.py index 4f8b7bf6..7fed9abd 100644 --- a/cloudinit/config/cc_locale.py +++ b/cloudinit/config/cc_locale.py @@ -11,13 +11,13 @@ from textwrap import dedent from cloudinit import util -from cloudinit.config.schema import get_schema_doc, validate_cloudconfig_schema +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit.settings import PER_INSTANCE frequency = PER_INSTANCE distros = ['all'] -schema = { +meta = { 'id': 'cc_locale', 'name': 'Locale', 'title': 'Set system locale', @@ -39,6 +39,9 @@ schema = { """), ], 'frequency': frequency, +} + +schema = { 'type': 'object', 'properties': { 'locale': { @@ -57,7 +60,7 @@ schema = { }, } -__doc__ = get_schema_doc(schema) # Supplement python help() +__doc__ = get_meta_doc(meta, schema) # Supplement python help() def handle(name, cfg, cloud, log, args): diff --git a/cloudinit/config/cc_ntp.py b/cloudinit/config/cc_ntp.py index c3aee798..9c085a04 100644 --- a/cloudinit/config/cc_ntp.py +++ b/cloudinit/config/cc_ntp.py @@ -16,7 +16,7 @@ from cloudinit import templater from cloudinit import type_utils from cloudinit import subp from cloudinit import util -from cloudinit.config.schema import get_schema_doc, validate_cloudconfig_schema +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit.settings import PER_INSTANCE LOG = logging.getLogger(__name__) @@ -140,7 +140,7 @@ DISTRO_CLIENT_CONFIG = { # configuration options before actually attempting to deploy with said # configuration. -schema = { +meta = { 'id': 'cc_ntp', 'name': 'NTP', 'title': 'enable and configure ntp', @@ -190,6 +190,9 @@ schema = { - ntp.ubuntu.com - 192.168.23.2""")], 'frequency': PER_INSTANCE, +} + +schema = { 'type': 'object', 'properties': { 'ntp': { @@ -289,12 +292,10 @@ schema = { }, # Don't use REQUIRED_NTP_CONFIG_KEYS to allow for override # of builtin client values. - 'required': [], 'minProperties': 1, # If we have config, define something 'additionalProperties': False }, }, - 'required': [], 'additionalProperties': False } } @@ -303,7 +304,7 @@ REQUIRED_NTP_CONFIG_KEYS = frozenset([ 'check_exe', 'confpath', 'packages', 'service_name']) -__doc__ = get_schema_doc(schema) # Supplement python help() +__doc__ = get_meta_doc(meta, schema) # Supplement python help() def distro_ntp_client_configs(distro): diff --git a/cloudinit/config/cc_resizefs.py b/cloudinit/config/cc_resizefs.py index 990a6939..00bb7ae7 100644 --- a/cloudinit/config/cc_resizefs.py +++ b/cloudinit/config/cc_resizefs.py @@ -13,8 +13,7 @@ import os import stat from textwrap import dedent -from cloudinit.config.schema import ( - get_schema_doc, validate_cloudconfig_schema) +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit.settings import PER_ALWAYS from cloudinit import subp from cloudinit import util @@ -24,7 +23,7 @@ NOBLOCK = "noblock" frequency = PER_ALWAYS distros = ['all'] -schema = { +meta = { 'id': 'cc_resizefs', 'name': 'Resizefs', 'title': 'Resize filesystem', @@ -42,6 +41,9 @@ schema = { 'examples': [ 'resize_rootfs: false # disable root filesystem resize operation'], 'frequency': PER_ALWAYS, +} + +schema = { 'type': 'object', 'properties': { 'resize_rootfs': { @@ -52,7 +54,7 @@ schema = { } } -__doc__ = get_schema_doc(schema) # Supplement python help() +__doc__ = get_meta_doc(meta, schema) # Supplement python help() def _resize_btrfs(mount_point, devpth): diff --git a/cloudinit/config/cc_runcmd.py b/cloudinit/config/cc_runcmd.py index 15960c7d..2f5e02cb 100644 --- a/cloudinit/config/cc_runcmd.py +++ b/cloudinit/config/cc_runcmd.py @@ -8,8 +8,7 @@ """Runcmd: run arbitrary commands at rc.local with output to the console""" -from cloudinit.config.schema import ( - get_schema_doc, validate_cloudconfig_schema) +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit.distros import ALL_DISTROS from cloudinit.settings import PER_INSTANCE from cloudinit import util @@ -26,7 +25,7 @@ from textwrap import dedent distros = [ALL_DISTROS] -schema = { +meta = { 'id': 'cc_runcmd', 'name': 'Runcmd', 'title': 'Run arbitrary commands', @@ -58,6 +57,9 @@ schema = { - [ wget, "http://example.org", -O, /tmp/index.html ] """)], 'frequency': PER_INSTANCE, +} + +schema = { 'type': 'object', 'properties': { 'runcmd': { @@ -71,12 +73,11 @@ schema = { 'additionalItems': False, # Reject items of non-string non-list 'additionalProperties': False, 'minItems': 1, - 'required': [], } } } -__doc__ = get_schema_doc(schema) # Supplement python help() +__doc__ = get_meta_doc(meta, schema) # Supplement python help() def handle(name, cfg, cloud, log, _args): diff --git a/cloudinit/config/cc_snap.py b/cloudinit/config/cc_snap.py index 20ed7d2f..21f30b57 100644 --- a/cloudinit/config/cc_snap.py +++ b/cloudinit/config/cc_snap.py @@ -8,8 +8,7 @@ import sys from textwrap import dedent from cloudinit import log as logging -from cloudinit.config.schema import ( - get_schema_doc, validate_cloudconfig_schema) +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit.settings import PER_INSTANCE from cloudinit.subp import prepend_base_command from cloudinit import subp @@ -21,7 +20,7 @@ frequency = PER_INSTANCE LOG = logging.getLogger(__name__) -schema = { +meta = { 'id': 'cc_snap', 'name': 'Snap', 'title': 'Install, configure and manage snapd and snap packages', @@ -103,6 +102,9 @@ schema = { signed_assertion_blob_here """)], 'frequency': PER_INSTANCE, +} + +schema = { 'type': 'object', 'properties': { 'snap': { @@ -139,13 +141,12 @@ schema = { } }, 'additionalProperties': False, # Reject keys not in schema - 'required': [], 'minProperties': 1 } } } -__doc__ = get_schema_doc(schema) # Supplement python help() +__doc__ = get_meta_doc(meta, schema) # Supplement python help() SNAP_CMD = "snap" ASSERTIONS_FILE = "/var/lib/cloud/instance/snapd.assertions" diff --git a/cloudinit/config/cc_ubuntu_advantage.py b/cloudinit/config/cc_ubuntu_advantage.py index d61dc655..831a92a2 100644 --- a/cloudinit/config/cc_ubuntu_advantage.py +++ b/cloudinit/config/cc_ubuntu_advantage.py @@ -4,8 +4,7 @@ from textwrap import dedent -from cloudinit.config.schema import ( - get_schema_doc, validate_cloudconfig_schema) +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit import log as logging from cloudinit.settings import PER_INSTANCE from cloudinit import subp @@ -16,7 +15,7 @@ UA_URL = 'https://ubuntu.com/advantage' distros = ['ubuntu'] -schema = { +meta = { 'id': 'cc_ubuntu_advantage', 'name': 'Ubuntu Advantage', 'title': 'Configure Ubuntu Advantage support services', @@ -61,6 +60,9 @@ schema = { - fips """)], 'frequency': PER_INSTANCE, +} + +schema = { 'type': 'object', 'properties': { 'ubuntu_advantage': { @@ -82,7 +84,7 @@ schema = { } } -__doc__ = get_schema_doc(schema) # Supplement python help() +__doc__ = get_meta_doc(meta, schema) # Supplement python help() LOG = logging.getLogger(__name__) diff --git a/cloudinit/config/cc_ubuntu_drivers.py b/cloudinit/config/cc_ubuntu_drivers.py index 2d1d2b32..7f617efe 100644 --- a/cloudinit/config/cc_ubuntu_drivers.py +++ b/cloudinit/config/cc_ubuntu_drivers.py @@ -5,8 +5,7 @@ import os from textwrap import dedent -from cloudinit.config.schema import ( - get_schema_doc, validate_cloudconfig_schema) +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit import log as logging from cloudinit.settings import PER_INSTANCE from cloudinit import subp @@ -18,7 +17,7 @@ LOG = logging.getLogger(__name__) frequency = PER_INSTANCE distros = ['ubuntu'] -schema = { +meta = { 'id': 'cc_ubuntu_drivers', 'name': 'Ubuntu Drivers', 'title': 'Interact with third party drivers in Ubuntu.', @@ -32,6 +31,9 @@ schema = { license-accepted: true """)], 'frequency': frequency, +} + +schema = { 'type': 'object', 'properties': { 'drivers': { @@ -64,7 +66,7 @@ schema = { OLD_UBUNTU_DRIVERS_STDERR_NEEDLE = ( "ubuntu-drivers: error: argument : invalid choice: 'install'") -__doc__ = get_schema_doc(schema) # Supplement python help() +__doc__ = get_meta_doc(meta, schema) # Supplement python help() # Use a debconf template to configure a global debconf variable diff --git a/cloudinit/config/cc_write_files.py b/cloudinit/config/cc_write_files.py index 41c75fa2..55f8c684 100644 --- a/cloudinit/config/cc_write_files.py +++ b/cloudinit/config/cc_write_files.py @@ -10,8 +10,7 @@ import base64 import os from textwrap import dedent -from cloudinit.config.schema import ( - get_schema_doc, validate_cloudconfig_schema) +from cloudinit.config.schema import get_meta_doc, validate_cloudconfig_schema from cloudinit import log as logging from cloudinit.settings import PER_INSTANCE from cloudinit import util @@ -38,7 +37,7 @@ supported_encoding_types = [ 'gz', 'gzip', 'gz+base64', 'gzip+base64', 'gz+b64', 'gzip+b64', 'b64', 'base64'] -schema = { +meta = { 'id': 'cc_write_files', 'name': 'Write Files', 'title': 'write arbitrary files', @@ -111,6 +110,9 @@ schema = { defer: true """)], 'frequency': frequency, +} + +schema = { 'type': 'object', 'properties': { 'write_files': { @@ -187,7 +189,7 @@ schema = { } } -__doc__ = get_schema_doc(schema) # Supplement python help() +__doc__ = get_meta_doc(meta, schema) # Supplement python help() def handle(name, cfg, _cloud, log, _args): diff --git a/cloudinit/config/cc_write_files_deferred.py b/cloudinit/config/cc_write_files_deferred.py index 0c75aa22..4fc8659c 100644 --- a/cloudinit/config/cc_write_files_deferred.py +++ b/cloudinit/config/cc_write_files_deferred.py @@ -4,34 +4,31 @@ """Defer writing certain files""" -from textwrap import dedent - from cloudinit.config.schema import validate_cloudconfig_schema from cloudinit import util from cloudinit.config.cc_write_files import ( schema as write_files_schema, write_files, DEFAULT_DEFER) +# meta is not used in this module, but it remains as code documentation +# +# id: cc_write_files_deferred' +# name: 'Write Deferred Files +# distros: ['all'], +# frequency: PER_INSTANCE, +# title: +# write certain files, whose creation as been deferred, during +# final stage +# description: +# This module is based on `'Write Files' `__, and +# will handle all files from the write_files list, that have been +# marked as deferred and thus are not being processed by the +# write-files module. +# +# *Please note that his module is not exposed to the user through +# its own dedicated top-level directive.* + +schema = write_files_schema -schema = util.mergemanydict([ - { - 'id': 'cc_write_files_deferred', - 'name': 'Write Deferred Files', - 'title': dedent("""\ - write certain files, whose creation as been deferred, during - final stage - """), - 'description': dedent("""\ - This module is based on `'Write Files' `__, and - will handle all files from the write_files list, that have been - marked as deferred and thus are not being processed by the - write-files module. - - *Please note that his module is not exposed to the user through - its own dedicated top-level directive.* - """) - }, - write_files_schema -]) # Not exposed, because related modules should document this behaviour __doc__ = None diff --git a/cloudinit/config/cc_zypper_add_repo.py b/cloudinit/config/cc_zypper_add_repo.py index 05855b0c..bf1638fb 100644 --- a/cloudinit/config/cc_zypper_add_repo.py +++ b/cloudinit/config/cc_zypper_add_repo.py @@ -9,14 +9,14 @@ import configobj import os from textwrap import dedent -from cloudinit.config.schema import get_schema_doc +from cloudinit.config.schema import get_meta_doc from cloudinit import log as logging from cloudinit.settings import PER_ALWAYS from cloudinit import util distros = ['opensuse', 'sles'] -schema = { +meta = { 'id': 'cc_zypper_add_repo', 'name': 'ZypperAddRepo', 'title': 'Configure zypper behavior and add zypper repositories', @@ -51,6 +51,9 @@ schema = { # any setting in /etc/zypp/zypp.conf """)], 'frequency': PER_ALWAYS, +} + +schema = { 'type': 'object', 'properties': { 'zypper': { @@ -86,14 +89,13 @@ schema = { /etc/zypp/zypp.conf'""") } }, - 'required': [], 'minProperties': 1, # Either config or repo must be provided 'additionalProperties': False, # only repos and config allowed } } } -__doc__ = get_schema_doc(schema) # Supplement python help() +__doc__ = get_meta_doc(meta, schema) # Supplement python help() LOG = logging.getLogger(__name__) diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py index 456bab2c..d32b7c01 100644 --- a/cloudinit/config/schema.py +++ b/cloudinit/config/schema.py @@ -3,19 +3,22 @@ from cloudinit.cmd.devel import read_cfg_paths from cloudinit import importer -from cloudinit.util import find_modules, load_file +from cloudinit.importer import MetaSchema +from cloudinit.util import find_modules, load_file, error import argparse from collections import defaultdict from copy import deepcopy +from functools import partial import logging import os import re import sys import yaml +error = partial(error, sys_exit=True) + _YAML_MAP = {True: 'true', False: 'false', None: 'null'} -SCHEMA_UNDEFINED = b'UNDEFINED' CLOUD_CONFIG_HEADER = b'#cloud-config' SCHEMA_DOC_TMPL = """ {name} @@ -34,7 +37,7 @@ SCHEMA_DOC_TMPL = """ {property_doc} {examples} """ -SCHEMA_PROPERTY_TMPL = '{prefix}**{prop_name}:** ({type}) {description}' +SCHEMA_PROPERTY_TMPL = "{prefix}**{prop_name}:** ({prop_type}) {description}" SCHEMA_LIST_ITEM_TMPL = ( '{prefix}Each item in **{prop_name}** list supports the following keys:') SCHEMA_EXAMPLES_HEADER = '\n**Examples**::\n\n' @@ -72,45 +75,102 @@ def is_schema_byte_string(checker, instance): isinstance(instance, (bytes,))) -def validate_cloudconfig_schema(config, schema, strict=False): - """Validate provided config meets the schema definition. +def get_jsonschema_validator(): + """Get metaschema validator and format checker - @param config: Dict of cloud configuration settings validated against - schema. - @param schema: jsonschema dict describing the supported schema definition - for the cloud config module (config.cc_*). - @param strict: Boolean, when True raise SchemaValidationErrors instead of - logging warnings. + Older versions of jsonschema require some compatibility changes. - @raises: SchemaValidationError when provided config does not validate - against the provided schema. + @returns: Tuple: (jsonschema.Validator, FormatChecker) + @raises: ImportError when jsonschema is not present """ - try: - from jsonschema import Draft4Validator, FormatChecker - from jsonschema.validators import create, extend - except ImportError: - logging.debug( - 'Ignoring schema validation. python-jsonschema is not present') - return + from jsonschema import Draft4Validator, FormatChecker + from jsonschema.validators import create # Allow for bytes to be presented as an acceptable valid value for string # type jsonschema attributes in cloud-init's schema. # This allows #cloud-config to provide valid yaml "content: !!binary | ..." + + strict_metaschema = deepcopy(Draft4Validator.META_SCHEMA) + strict_metaschema['additionalProperties'] = False if hasattr(Draft4Validator, 'TYPE_CHECKER'): # jsonschema 3.0+ type_checker = Draft4Validator.TYPE_CHECKER.redefine( 'string', is_schema_byte_string) - cloudinitValidator = extend(Draft4Validator, type_checker=type_checker) + cloudinitValidator = create( + meta_schema=strict_metaschema, + validators=Draft4Validator.VALIDATORS, + version="draft4", + type_checker=type_checker) else: # jsonschema 2.6 workaround types = Draft4Validator.DEFAULT_TYPES - # Allow bytes as well as string (and disable a spurious - # unsupported-assignment-operation pylint warning which appears because - # this code path isn't written against the latest jsonschema). + # Allow bytes as well as string (and disable a spurious unsupported + # assignment-operation pylint warning which appears because this + # code path isn't written against the latest jsonschema). types['string'] = (str, bytes) # pylint: disable=E1137 cloudinitValidator = create( - meta_schema=Draft4Validator.META_SCHEMA, + meta_schema=strict_metaschema, validators=Draft4Validator.VALIDATORS, version="draft4", default_types=types) + return (cloudinitValidator, FormatChecker) + + +def validate_cloudconfig_metaschema(validator, schema: dict, throw=True): + """Validate provided schema meets the metaschema definition. Return strict + Validator and FormatChecker for use in validation + @param validator: Draft4Validator instance used to validate the schema + @param schema: schema to validate + @param throw: Sometimes the validator and checker are required, even if + the schema is invalid. Toggle for whether to raise + SchemaValidationError or log warnings. + + @raises: ImportError when jsonschema is not present + @raises: SchemaValidationError when the schema is invalid + """ + + from jsonschema.exceptions import SchemaError + + try: + validator.check_schema(schema) + except SchemaError as err: + # Raise SchemaValidationError to avoid jsonschema imports at call + # sites + if throw: + raise SchemaValidationError( + schema_errors=( + ('.'.join([str(p) for p in err.path]), err.message), + ) + ) from err + logging.warning( + "Meta-schema validation failed, attempting to validate config " + "anyway: %s", err) + + +def validate_cloudconfig_schema( + config: dict, schema: dict, strict=False, strict_metaschema=False +): + """Validate provided config meets the schema definition. + + @param config: Dict of cloud configuration settings validated against + schema. Ignored if strict_metaschema=True + @param schema: jsonschema dict describing the supported schema definition + for the cloud config module (config.cc_*). + @param strict: Boolean, when True raise SchemaValidationErrors instead of + logging warnings. + @param strict_metaschema: Boolean, when True validates schema using strict + metaschema definition at runtime (currently unused) + + @raises: SchemaValidationError when provided config does not validate + against the provided schema. + """ + try: + (cloudinitValidator, FormatChecker) = get_jsonschema_validator() + if strict_metaschema: + validate_cloudconfig_metaschema( + cloudinitValidator, schema, throw=False) + except ImportError: + logging.debug("Ignoring schema validation. jsonschema is not present") + return + validator = cloudinitValidator(schema, format_checker=FormatChecker()) errors = () for error in sorted(validator.iter_errors(config), key=lambda e: e.path): @@ -301,12 +361,15 @@ def _schemapath_for_cloudconfig(config, original_content): return schema_line_numbers -def _get_property_type(property_dict): - """Return a string representing a property type from a given jsonschema.""" - property_type = property_dict.get('type', SCHEMA_UNDEFINED) - if property_type == SCHEMA_UNDEFINED and property_dict.get('enum'): +def _get_property_type(property_dict: dict) -> str: + """Return a string representing a property type from a given + jsonschema. + """ + property_type = property_dict.get("type") + if property_type is None and property_dict.get("enum"): property_type = [ - str(_YAML_MAP.get(k, k)) for k in property_dict['enum']] + str(_YAML_MAP.get(k, k)) for k in property_dict["enum"] + ] if isinstance(property_type, list): property_type = '/'.join(property_type) items = property_dict.get('items', {}) @@ -317,12 +380,12 @@ def _get_property_type(property_dict): sub_property_type += '/' sub_property_type += '(' + _get_property_type(sub_item) + ')' if sub_property_type: - return '{0} of {1}'.format(property_type, sub_property_type) - return property_type + return "{0} of {1}".format(property_type, sub_property_type) + return property_type or "UNDEFINED" -def _parse_description(description, prefix): - """Parse description from the schema in a format that we can better +def _parse_description(description, prefix) -> str: + """Parse description from the meta in a format that we can better display in our docs. This parser does three things: - Guarantee that a paragraph will be in a single line @@ -330,7 +393,7 @@ def _parse_description(description, prefix): the first paragraph - Proper align lists of items - @param description: The original description in the schema. + @param description: The original description in the meta. @param prefix: The number of spaces used to align the current description """ list_paragraph = prefix * 3 @@ -343,20 +406,24 @@ def _parse_description(description, prefix): return description -def _get_property_doc(schema, prefix=' '): +def _get_property_doc(schema: dict, prefix=" ") -> str: """Return restructured text describing the supported schema properties.""" new_prefix = prefix + ' ' properties = [] for prop_key, prop_config in schema.get('properties', {}).items(): - # Define prop_name and dscription for SCHEMA_PROPERTY_TMPL + # Define prop_name and description for SCHEMA_PROPERTY_TMPL description = prop_config.get('description', '') - properties.append(SCHEMA_PROPERTY_TMPL.format( - prefix=prefix, - prop_name=prop_key, - type=_get_property_type(prop_config), - description=_parse_description(description, prefix))) - items = prop_config.get('items') + # Define prop_name and description for SCHEMA_PROPERTY_TMPL + properties.append( + SCHEMA_PROPERTY_TMPL.format( + prefix=prefix, + prop_name=prop_key, + description=_parse_description(description, prefix), + prop_type=_get_property_type(prop_config), + ) + ) + items = prop_config.get("items") if items: if isinstance(items, list): for item in items: @@ -373,9 +440,9 @@ def _get_property_doc(schema, prefix=' '): return '\n\n'.join(properties) -def _get_schema_examples(schema, prefix=''): - """Return restructured text describing the schema examples if present.""" - examples = schema.get('examples') +def _get_examples(meta: MetaSchema) -> str: + """Return restructured text describing the meta examples if present.""" + examples = meta.get("examples") if not examples: return '' rst_content = SCHEMA_EXAMPLES_HEADER @@ -390,48 +457,111 @@ def _get_schema_examples(schema, prefix=''): return rst_content -def get_schema_doc(schema): - """Return reStructured text rendering the provided jsonschema. +def get_meta_doc(meta: MetaSchema, schema: dict) -> str: + """Return reStructured text rendering the provided metadata. - @param schema: Dict of jsonschema to render. - @raise KeyError: If schema lacks an expected key. + @param meta: Dict of metadata to render. + @raise KeyError: If metadata lacks an expected key. """ - schema_copy = deepcopy(schema) - schema_copy['property_doc'] = _get_property_doc(schema) - schema_copy['examples'] = _get_schema_examples(schema) - schema_copy['distros'] = ', '.join(schema['distros']) + + if not meta or not schema: + raise ValueError("Expected meta and schema") + keys = set(meta.keys()) + expected = set( + { + "id", + "title", + "examples", + "frequency", + "distros", + "description", + "name", + } + ) + error_message = "" + if expected - keys: + error_message = "Missing expected keys in module meta: {}".format( + expected - keys + ) + elif keys - expected: + error_message = ( + "Additional unexpected keys found in module meta: {}".format( + keys - expected + ) + ) + if error_message: + raise KeyError(error_message) + + # cast away type annotation + meta_copy = dict(deepcopy(meta)) + meta_copy["property_doc"] = _get_property_doc(schema) + meta_copy["examples"] = _get_examples(meta) + meta_copy["distros"] = ", ".join(meta["distros"]) # Need an underbar of the same length as the name - schema_copy['title_underbar'] = re.sub(r'.', '-', schema['name']) - return SCHEMA_DOC_TMPL.format(**schema_copy) + meta_copy["title_underbar"] = re.sub(r".", "-", meta["name"]) + template = SCHEMA_DOC_TMPL.format(**meta_copy) + return template + + +def get_modules() -> dict: + configs_dir = os.path.dirname(os.path.abspath(__file__)) + return find_modules(configs_dir) + +def load_doc(requested_modules: list) -> str: + """Load module docstrings -FULL_SCHEMA = None + Docstrings are generated on module load. Reduce, reuse, recycle. + """ + docs = "" + all_modules = list(get_modules().values()) + ["all"] + invalid_docs = set(requested_modules).difference(set(all_modules)) + if invalid_docs: + error( + "Invalid --docs value {}. Must be one of: {}".format( + list(invalid_docs), ", ".join(all_modules), + ) + ) + for mod_name in all_modules: + if "all" in requested_modules or mod_name in requested_modules: + (mod_locs, _) = importer.find_module( + mod_name, ["cloudinit.config"], ["schema"] + ) + if mod_locs: + mod = importer.import_module(mod_locs[0]) + docs += mod.__doc__ or "" + return docs -def get_schema(): +def get_schema() -> dict: """Return jsonschema coalesced from all cc_* cloud-config module.""" - global FULL_SCHEMA - if FULL_SCHEMA: - return FULL_SCHEMA full_schema = { - '$schema': 'http://json-schema.org/draft-04/schema#', - 'id': 'cloud-config-schema', 'allOf': []} - - configs_dir = os.path.dirname(os.path.abspath(__file__)) - potential_handlers = find_modules(configs_dir) - for (_fname, mod_name) in potential_handlers.items(): - mod_locs, _looked_locs = importer.find_module( - mod_name, ['cloudinit.config'], ['schema']) + "$schema": "http://json-schema.org/draft-04/schema#", + "id": "cloud-config-schema", + "allOf": [], + } + + for (_, mod_name) in get_modules().items(): + (mod_locs, _) = importer.find_module( + mod_name, ["cloudinit.config"], ["schema"] + ) if mod_locs: mod = importer.import_module(mod_locs[0]) - full_schema['allOf'].append(mod.schema) - FULL_SCHEMA = full_schema + full_schema["allOf"].append(mod.schema) return full_schema -def error(message): - print(message, file=sys.stderr) - sys.exit(1) +def get_meta() -> dict: + """Return metadata coalesced from all cc_* cloud-config module.""" + full_meta = dict() + for (_, mod_name) in get_modules().items(): + mod_locs, _ = importer.find_module( + mod_name, ["cloudinit.config"], ["meta"] + ) + if mod_locs: + mod = importer.import_module(mod_locs[0]) + full_meta[mod.meta["id"]] = mod.meta + return full_meta def get_parser(parser=None): @@ -474,15 +604,7 @@ def handle_schema_args(name, args): cfg_name = args.config_file print("Valid cloud-config:", cfg_name) elif args.docs: - schema_ids = [subschema['id'] for subschema in full_schema['allOf']] - schema_ids += ['all'] - invalid_docs = set(args.docs).difference(set(schema_ids)) - if invalid_docs: - error('Invalid --docs value {0}. Must be one of: {1}'.format( - list(invalid_docs), ', '.join(schema_ids))) - for subschema in full_schema['allOf']: - if 'all' in args.docs or subschema['id'] in args.docs: - print(get_schema_doc(subschema)) + print(load_doc(args.docs)) def main(): -- cgit v1.2.3