yaml_load/schema: Add invalid line and column nums to error message

Yaml tracebacks are generally hard to read for average users. Add a bit of logic to util.yaml_load and schema validation to look for YAMLError.context_marker or problem_marker line and column counts. No longer log the full exceeption traceback from the yaml_load error, instead just LOG.warning for the specific error and point to the offending line and column where the problem exists.
author: Chad Smith <chad.smith@canonical.com> 2018-05-23 15:56:16 -0400
committer: Scott Moser <smoser@brickies.net> 2018-05-23 15:56:16 -0400
commit: 3b28bdc616f3e7f4d6b419629dc7b9efc3ae8d1e (patch)
tree: b0a5d6f097c07ec693b3ff95e6a791955c6302f0
parent: aa4eeb80839382117e1813e396dc53aa634fd7ba (diff)
download: vyos-cloud-init-3b28bdc616f3e7f4d6b419629dc7b9efc3ae8d1e.tar.gz
vyos-cloud-init-3b28bdc616f3e7f4d6b419629dc7b9efc3ae8d1e.zip
4 files changed, 118 insertions, 27 deletions
diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py
index 3bad8e22..080a6d06 100644
--- a/cloudinit/config/schema.py
+++ b/cloudinit/config/schema.py
@@ -93,20 +93,33 @@ def validate_cloudconfig_schema(config, schema, strict=False):
 def annotated_cloudconfig_file(cloudconfig, original_content, schema_errors):
     """Return contents of the cloud-config file annotated with schema errors.
 
-    @param cloudconfig: YAML-loaded object from the original_content.
+    @param cloudconfig: YAML-loaded dict from the original_content or empty
+        dict if unparseable.
     @param original_content: The contents of a cloud-config file
     @param schema_errors: List of tuples from a JSONSchemaValidationError. The
         tuples consist of (schemapath, error_message).
     """
     if not schema_errors:
         return original_content
-    schemapaths = _schemapath_for_cloudconfig(cloudconfig, original_content)
+    schemapaths = {}
+    if cloudconfig:
+        schemapaths = _schemapath_for_cloudconfig(
+            cloudconfig, original_content)
     errors_by_line = defaultdict(list)
     error_count = 1
     error_footer = []
     annotated_content = []
     for path, msg in schema_errors:
-        errors_by_line[schemapaths[path]].append(msg)
+        match = re.match(r'format-l(?P<line>\d+)\.c(?P<col>\d+).*', path)
+        if match:
+            line, col = match.groups()
+            errors_by_line[int(line)].append(msg)
+        else:
+            col = None
+            errors_by_line[schemapaths[path]].append(msg)
+        if col is not None:
+            msg = 'Line {line} column {col}: {msg}'.format(
+                line=line, col=col, msg=msg)
         error_footer.append('# E{0}: {1}'.format(error_count, msg))
         error_count += 1
     lines = original_content.decode().split('\n')
@@ -142,18 +155,31 @@ def validate_cloudconfig_file(config_path, schema, annotate=False):
     content = load_file(config_path, decode=False)
     if not content.startswith(CLOUD_CONFIG_HEADER):
         errors = (
-            ('header', 'File {0} needs to begin with "{1}"'.format(
+            ('format-l1.c1', 'File {0} needs to begin with "{1}"'.format(
                 config_path, CLOUD_CONFIG_HEADER.decode())),)
-        raise SchemaValidationError(errors)
-
+        error = SchemaValidationError(errors)
+        if annotate:
+            print(annotated_cloudconfig_file({}, content, error.schema_errors))
+        raise error
     try:
         cloudconfig = yaml.safe_load(content)
-    except yaml.parser.ParserError as e:
-        errors = (
-            ('format', 'File {0} is not valid yaml. {1}'.format(
-                config_path, str(e))),)
-        raise SchemaValidationError(errors)
-
+    except (yaml.YAMLError) as e:
+        line = column = 1
+        mark = None
+        if hasattr(e, 'context_mark') and getattr(e, 'context_mark'):
+            mark = getattr(e, 'context_mark')
+        elif hasattr(e, 'problem_mark') and getattr(e, 'problem_mark'):
+            mark = getattr(e, 'problem_mark')
+        if mark:
+            line = mark.line + 1
+            column = mark.column + 1
+        errors = (('format-l{line}.c{col}'.format(line=line, col=column),
+                   'File {0} is not valid yaml. {1}'.format(
+                       config_path, str(e))),)
+        error = SchemaValidationError(errors)
+        if annotate:
+            print(annotated_cloudconfig_file({}, content, error.schema_errors))
+        raise error
     try:
         validate_cloudconfig_schema(
             cloudconfig, schema, strict=True)
@@ -176,7 +202,7 @@ def _schemapath_for_cloudconfig(config, original_content):
     list_index = 0
     RE_YAML_INDENT = r'^(\s*)'
     scopes = []
-    for line_number, line in enumerate(content_lines):
+    for line_number, line in enumerate(content_lines, 1):
         indent_depth = len(re.match(RE_YAML_INDENT, line).groups()[0])
         line = line.strip()
         if not line or line.startswith('#'):
@@ -208,8 +234,8 @@ def _schemapath_for_cloudconfig(config, original_content):
                 scopes.append((indent_depth + 2, key + '.0'))
                 for inner_list_index in range(0, len(yaml.safe_load(value))):
                     list_key = key + '.' + str(inner_list_index)
-                    schema_line_numbers[list_key] = line_number + 1
-        schema_line_numbers[key] = line_number + 1
+                    schema_line_numbers[list_key] = line_number
+        schema_line_numbers[key] = line_number
     return schema_line_numbers
 
 
@@ -337,9 +363,11 @@ def handle_schema_args(name, args):
         try:
             validate_cloudconfig_file(
                 args.config_file, full_schema, args.annotate)
-        except (SchemaValidationError, RuntimeError) as e:
+        except SchemaValidationError as e:
             if not args.annotate:
                 error(str(e))
+        except RuntimeError as e:
+                error(str(e))
         else:
             print("Valid cloud-config file {0}".format(args.config_file))
     if args.doc:
diff --git a/cloudinit/util.py b/cloudinit/util.py
index 653ed6ea..c0473b8c 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -874,8 +874,20 @@ def load_yaml(blob, default=None, allowed=(dict,)):
                              " but got %s instead") %
                             (allowed, type_utils.obj_name(converted)))
         loaded = converted
-    except (yaml.YAMLError, TypeError, ValueError):
-        logexc(LOG, "Failed loading yaml blob")
+    except (yaml.YAMLError, TypeError, ValueError) as e:
+        msg = 'Failed loading yaml blob'
+        mark = None
+        if hasattr(e, 'context_mark') and getattr(e, 'context_mark'):
+            mark = getattr(e, 'context_mark')
+        elif hasattr(e, 'problem_mark') and getattr(e, 'problem_mark'):
+            mark = getattr(e, 'problem_mark')
+        if mark:
+            msg += (
+                '. Invalid format at line {line} column {col}: "{err}"'.format(
+                    line=mark.line + 1, col=mark.column + 1, err=e))
+        else:
+            msg += '. {err}'.format(err=e)
+        LOG.warning(msg)
     return loaded
 
 
diff --git a/tests/unittests/test_handler/test_schema.py b/tests/unittests/test_handler/test_schema.py
index ac41f124..fb266faf 100644
--- a/tests/unittests/test_handler/test_schema.py
+++ b/tests/unittests/test_handler/test_schema.py
@@ -134,22 +134,35 @@ class ValidateCloudConfigFileTest(CiTestCase):
         with self.assertRaises(SchemaValidationError) as context_mgr:
             validate_cloudconfig_file(self.config_file, {})
         self.assertEqual(
-            'Cloud config schema errors: header: File {0} needs to begin with '
-            '"{1}"'.format(self.config_file, CLOUD_CONFIG_HEADER.decode()),
+            'Cloud config schema errors: format-l1.c1: File {0} needs to begin'
+            ' with "{1}"'.format(
+                self.config_file, CLOUD_CONFIG_HEADER.decode()),
             str(context_mgr.exception))
 
-    def test_validateconfig_file_error_on_non_yaml_format(self):
-        """On non-yaml format, validate_cloudconfig_file errors."""
+    def test_validateconfig_file_error_on_non_yaml_scanner_error(self):
+        """On non-yaml scan issues, validate_cloudconfig_file errors."""
+        # Generate a scanner error by providing text on a single line with
+        # improper indent.
+        write_file(self.config_file, '#cloud-config\nasdf:\nasdf')
+        with self.assertRaises(SchemaValidationError) as context_mgr:
+            validate_cloudconfig_file(self.config_file, {})
+        self.assertIn(
+            'schema errors: format-l3.c1: File {0} is not valid yaml.'.format(
+                self.config_file),
+            str(context_mgr.exception))
+
+    def test_validateconfig_file_error_on_non_yaml_parser_error(self):
+        """On non-yaml parser issues, validate_cloudconfig_file errors."""
         write_file(self.config_file, '#cloud-config\n{}}')
         with self.assertRaises(SchemaValidationError) as context_mgr:
             validate_cloudconfig_file(self.config_file, {})
         self.assertIn(
-            'schema errors: format: File {0} is not valid yaml.'.format(
+            'schema errors: format-l2.c3: File {0} is not valid yaml.'.format(
                 self.config_file),
             str(context_mgr.exception))
 
     @skipUnlessJsonSchema()
-    def test_validateconfig_file_sctricty_validates_schema(self):
+    def test_validateconfig_file_sctrictly_validates_schema(self):
         """validate_cloudconfig_file raises errors on invalid schema."""
         schema = {
             'properties': {'p1': {'type': 'string', 'format': 'hostname'}}}
@@ -342,6 +355,20 @@ class MainTest(CiTestCase):
             'Expected either --config-file argument or --doc\n',
             m_stderr.getvalue())
 
+    def test_main_absent_config_file(self):
+        """Main exits non-zero when config file is absent."""
+        myargs = ['mycmd', '--annotate', '--config-file', 'NOT_A_FILE']
+        with mock.patch('sys.exit', side_effect=self.sys_exit):
+            with mock.patch('sys.argv', myargs):
+                with mock.patch('sys.stderr', new_callable=StringIO) as \
+                        m_stderr:
+                    with self.assertRaises(SystemExit) as context_manager:
+                        main()
+        self.assertEqual(1, context_manager.exception.code)
+        self.assertEqual(
+            'Configfile NOT_A_FILE does not exist\n',
+            m_stderr.getvalue())
+
     def test_main_prints_docs(self):
         """When --doc parameter is provided, main generates documentation."""
         myargs = ['mycmd', '--doc']
diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py
index 84941c7d..d774f3dc 100644
--- a/tests/unittests/test_util.py
+++ b/tests/unittests/test_util.py
@@ -4,6 +4,7 @@ from __future__ import print_function
 
 import logging
 import os
+import re
 import shutil
 import stat
 import tempfile
@@ -265,26 +266,49 @@ class TestGetCmdline(helpers.TestCase):
         self.assertEqual("abcd 123", ret)
 
 
-class TestLoadYaml(helpers.TestCase):
+class TestLoadYaml(helpers.CiTestCase):
     mydefault = "7b03a8ebace993d806255121073fed52"
+    with_logs = True
 
     def test_simple(self):
         mydata = {'1': "one", '2': "two"}
         self.assertEqual(util.load_yaml(yaml.dump(mydata)), mydata)
 
     def test_nonallowed_returns_default(self):
+        '''Any unallowed types result in returning default; log the issue.'''
         # for now, anything not in the allowed list just returns the default.
         myyaml = yaml.dump({'1': "one"})
         self.assertEqual(util.load_yaml(blob=myyaml,
                                         default=self.mydefault,
                                         allowed=(str,)),
                          self.mydefault)
-
-    def test_bogus_returns_default(self):
+        regex = re.compile(
+            r'Yaml load allows \(<(class|type) \'str\'>,\) root types, but'
+            r' got dict')
+        self.assertTrue(regex.search(self.logs.getvalue()),
+                        msg='Missing expected yaml load error')
+
+    def test_bogus_scan_error_returns_default(self):
+        '''On Yaml scan error, load_yaml returns the default and logs issue.'''
         badyaml = "1\n 2:"
         self.assertEqual(util.load_yaml(blob=badyaml,
                                         default=self.mydefault),
                          self.mydefault)
+        self.assertIn(
+            'Failed loading yaml blob. Invalid format at line 2 column 3:'
+            ' "mapping values are not allowed here',
+            self.logs.getvalue())
+
+    def test_bogus_parse_error_returns_default(self):
+        '''On Yaml parse error, load_yaml returns default and logs issue.'''
+        badyaml = "{}}"
+        self.assertEqual(util.load_yaml(blob=badyaml,
+                                        default=self.mydefault),
+                         self.mydefault)
+        self.assertIn(
+            'Failed loading yaml blob. Invalid format at line 1 column 3:'
+            " \"expected \'<document start>\', but found \'}\'",
+            self.logs.getvalue())
 
     def test_unsafe_types(self):
         # should not load complex types
author	Chad Smith <chad.smith@canonical.com>	2018-05-23 15:56:16 -0400
committer	Scott Moser <smoser@brickies.net>	2018-05-23 15:56:16 -0400
commit	3b28bdc616f3e7f4d6b419629dc7b9efc3ae8d1e (patch)
tree	b0a5d6f097c07ec693b3ff95e6a791955c6302f0
parent	aa4eeb80839382117e1813e396dc53aa634fd7ba (diff)
download	vyos-cloud-init-3b28bdc616f3e7f4d6b419629dc7b9efc3ae8d1e.tar.gz vyos-cloud-init-3b28bdc616f3e7f4d6b419629dc7b9efc3ae8d1e.zip