diff options
30 files changed, 949 insertions, 216 deletions
| @@ -48,6 +48,8 @@     output does not get a 'killed by TERM signal' message.   - support resizing partitions via growpart or parted (LP: #1136936)   - allow specifying apt-get command in distro config ('apt_get_command') + - support different and user-suppliable merging algorithms for cloud-config +   (LP: #1023179)  0.7.1:   - sysvinit: fix missing dependency in cloud-init job for RHEL 5.6  diff --git a/cloudinit/config/cc_landscape.py b/cloudinit/config/cc_landscape.py index 2efdff79..8a709677 100644 --- a/cloudinit/config/cc_landscape.py +++ b/cloudinit/config/cc_landscape.py @@ -24,6 +24,7 @@ from StringIO import StringIO  from configobj import ConfigObj +from cloudinit import type_utils  from cloudinit import util  from cloudinit.settings import PER_INSTANCE @@ -58,7 +59,8 @@ def handle(_name, cfg, cloud, log, _args):      if not isinstance(ls_cloudcfg, (dict)):          raise RuntimeError(("'landscape' key existed in config,"                              " but not a dictionary type," -                            " is a %s instead"), util.obj_name(ls_cloudcfg)) +                            " is a %s instead"), +                           type_utils.obj_name(ls_cloudcfg))      if not ls_cloudcfg:          return diff --git a/cloudinit/config/cc_mounts.py b/cloudinit/config/cc_mounts.py index 9010d97f..390ba711 100644 --- a/cloudinit/config/cc_mounts.py +++ b/cloudinit/config/cc_mounts.py @@ -22,6 +22,7 @@ from string import whitespace  # pylint: disable=W0402  import re +from cloudinit import type_utils  from cloudinit import util  # Shortname matches 'sda', 'sda1', 'xvda', 'hda', 'sdb', xvdb, vda, vdd1, sr0 @@ -60,7 +61,7 @@ def handle(_name, cfg, cloud, log, _args):          # skip something that wasn't a list          if not isinstance(cfgmnt[i], list):              log.warn("Mount option %s not a list, got a %s instead", -                     (i + 1), util.obj_name(cfgmnt[i])) +                     (i + 1), type_utils.obj_name(cfgmnt[i]))              continue          startname = str(cfgmnt[i][0]) diff --git a/cloudinit/distros/__init__.py b/cloudinit/distros/__init__.py index 2a2d8216..50d52594 100644 --- a/cloudinit/distros/__init__.py +++ b/cloudinit/distros/__init__.py @@ -31,6 +31,7 @@ import re  from cloudinit import importer  from cloudinit import log as logging  from cloudinit import ssh_util +from cloudinit import type_utils  from cloudinit import util  from cloudinit.distros.parsers import hosts @@ -445,7 +446,7 @@ class Distro(object):              lines.append("%s %s" % (user, rules))          else:              msg = "Can not create sudoers rule addition with type %r" -            raise TypeError(msg % (util.obj_name(rules))) +            raise TypeError(msg % (type_utils.obj_name(rules)))          content = "\n".join(lines)          content += "\n"  # trailing newline @@ -568,7 +569,7 @@ def _normalize_groups(grp_cfg):                              c_grp_cfg[k] = [v]                          else:                              raise TypeError("Bad group member type %s" % -                                            util.obj_name(v)) +                                            type_utils.obj_name(v))                      else:                          if isinstance(v, (list)):                              c_grp_cfg[k].extend(v) @@ -576,13 +577,13 @@ def _normalize_groups(grp_cfg):                              c_grp_cfg[k].append(v)                          else:                              raise TypeError("Bad group member type %s" % -                                            util.obj_name(v)) +                                            type_utils.obj_name(v))              elif isinstance(i, (str, basestring)):                  if i not in c_grp_cfg:                      c_grp_cfg[i] = []              else:                  raise TypeError("Unknown group name type %s" % -                                util.obj_name(i)) +                                type_utils.obj_name(i))          grp_cfg = c_grp_cfg      groups = {}      if isinstance(grp_cfg, (dict)): @@ -591,7 +592,7 @@ def _normalize_groups(grp_cfg):      else:          raise TypeError(("Group config must be list, dict "                           " or string types only and not %s") % -                        util.obj_name(grp_cfg)) +                        type_utils.obj_name(grp_cfg))      return groups @@ -622,7 +623,7 @@ def _normalize_users(u_cfg, def_user_cfg=None):                  ad_ucfg.append(v)              else:                  raise TypeError(("Unmappable user value type %s" -                                 " for key %s") % (util.obj_name(v), k)) +                                 " for key %s") % (type_utils.obj_name(v), k))          u_cfg = ad_ucfg      elif isinstance(u_cfg, (str, basestring)):          u_cfg = util.uniq_merge_sorted(u_cfg) @@ -647,7 +648,7 @@ def _normalize_users(u_cfg, def_user_cfg=None):          else:              raise TypeError(("User config must be dictionary/list "                               " or string types only and not %s") % -                            util.obj_name(user_config)) +                            type_utils.obj_name(user_config))      # Ensure user options are in the right python friendly format      if users: @@ -740,7 +741,7 @@ def normalize_users_groups(cfg, distro):              }          if not isinstance(old_user, (dict)):              LOG.warn(("Format for 'user' key must be a string or " -                      "dictionary and not %s"), util.obj_name(old_user)) +                      "dictionary and not %s"), type_utils.obj_name(old_user))              old_user = {}      # If no old user format, then assume the distro @@ -766,7 +767,7 @@ def normalize_users_groups(cfg, distro):      if not isinstance(base_users, (list, dict, str, basestring)):          LOG.warn(("Format for 'users' key must be a comma separated string"                    " or a dictionary or a list and not %s"), -                 util.obj_name(base_users)) +                 type_utils.obj_name(base_users))          base_users = []      if old_user: diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index 8d6dcd4d..924463ce 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -27,6 +27,7 @@ from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE, FREQUENCIES)  from cloudinit import importer  from cloudinit import log as logging +from cloudinit import type_utils  from cloudinit import util  LOG = logging.getLogger(__name__) @@ -69,7 +70,6 @@ INCLUSION_SRCH = sorted(list(INCLUSION_TYPES_MAP.keys()),  class Handler(object): -      __metaclass__ = abc.ABCMeta      def __init__(self, frequency, version=2): @@ -77,53 +77,66 @@ class Handler(object):          self.frequency = frequency      def __repr__(self): -        return "%s: [%s]" % (util.obj_name(self), self.list_types()) +        return "%s: [%s]" % (type_utils.obj_name(self), self.list_types())      @abc.abstractmethod      def list_types(self):          raise NotImplementedError() -    def handle_part(self, data, ctype, filename, payload, frequency): -        return self._handle_part(data, ctype, filename, payload, frequency) -      @abc.abstractmethod -    def _handle_part(self, data, ctype, filename, payload, frequency): +    def handle_part(self, *args, **kwargs):          raise NotImplementedError() -def run_part(mod, data, ctype, filename, payload, frequency): +def run_part(mod, data, filename, payload, frequency, headers):      mod_freq = mod.frequency      if not (mod_freq == PER_ALWAYS or              (frequency == PER_INSTANCE and mod_freq == PER_INSTANCE)):          return -    mod_ver = mod.handler_version      # Sanity checks on version (should be an int convertable)      try: +        mod_ver = mod.handler_version          mod_ver = int(mod_ver) -    except: +    except (TypeError, ValueError, AttributeError):          mod_ver = 1 +    content_type = headers['Content-Type']      try:          LOG.debug("Calling handler %s (%s, %s, %s) with frequency %s", -                  mod, ctype, filename, mod_ver, frequency) -        if mod_ver >= 2: +                  mod, content_type, filename, mod_ver, frequency) +        if mod_ver == 3: +            # Treat as v. 3 which does get a frequency + headers +            mod.handle_part(data, content_type, filename, +                            payload, frequency, headers) +        elif mod_ver == 2:              # Treat as v. 2 which does get a frequency -            mod.handle_part(data, ctype, filename, payload, frequency) -        else: +            mod.handle_part(data, content_type, filename, +                            payload, frequency) +        elif mod_ver == 1:              # Treat as v. 1 which gets no frequency -            mod.handle_part(data, ctype, filename, payload) +            mod.handle_part(data, content_type, filename, payload) +        else: +            raise ValueError("Unknown module version %s" % (mod_ver))      except:          util.logexc(LOG, ("Failed calling handler %s (%s, %s, %s)"                           " with frequency %s"), -                    mod, ctype, filename, +                    mod, content_type, filename,                      mod_ver, frequency)  def call_begin(mod, data, frequency): -    run_part(mod, data, CONTENT_START, None, None, frequency) +    # Create a fake header set +    headers = { +        'Content-Type': CONTENT_START, +    } +    run_part(mod, data, None, None, frequency, headers)  def call_end(mod, data, frequency): -    run_part(mod, data, CONTENT_END, None, None, frequency) +    # Create a fake header set +    headers = { +        'Content-Type': CONTENT_END, +    } +    run_part(mod, data, None, None, frequency, headers)  def walker_handle_handler(pdata, _ctype, _filename, payload): @@ -173,26 +186,27 @@ def _escape_string(text):      return text -def walker_callback(pdata, ctype, filename, payload): -    if ctype in PART_CONTENT_TYPES: -        walker_handle_handler(pdata, ctype, filename, payload) +def walker_callback(data, filename, payload, headers): +    content_type = headers['Content-Type'] +    if content_type in PART_CONTENT_TYPES: +        walker_handle_handler(data, content_type, filename, payload)          return -    handlers = pdata['handlers'] -    if ctype in pdata['handlers']: -        run_part(handlers[ctype], pdata['data'], ctype, filename, -                 payload, pdata['frequency']) +    handlers = data['handlers'] +    if content_type in handlers: +        run_part(handlers[content_type], data['data'], filename, +                 payload, data['frequency'], headers)      elif payload:          # Extract the first line or 24 bytes for displaying in the log          start = _extract_first_or_bytes(payload, 24)          details = "'%s...'" % (_escape_string(start)) -        if ctype == NOT_MULTIPART_TYPE: +        if content_type == NOT_MULTIPART_TYPE:              LOG.warning("Unhandled non-multipart (%s) userdata: %s", -                        ctype, details) +                        content_type, details)          else:              LOG.warning("Unhandled unknown content-type (%s) userdata: %s", -                        ctype, details) +                        content_type, details)      else: -        LOG.debug("empty payload of type %s" % ctype) +        LOG.debug("Empty payload of type %s", content_type)  # Callback is a function that will be called with @@ -212,7 +226,10 @@ def walk(msg, callback, data):          if not filename:              filename = PART_FN_TPL % (partnum) -        callback(data, ctype, filename, part.get_payload(decode=True)) +        headers = dict(part) +        LOG.debug(headers) +        headers['Content-Type'] = ctype +        callback(data, filename, part.get_payload(decode=True), headers)          partnum = partnum + 1 diff --git a/cloudinit/handlers/boot_hook.py b/cloudinit/handlers/boot_hook.py index 456b8020..bf2899ab 100644 --- a/cloudinit/handlers/boot_hook.py +++ b/cloudinit/handlers/boot_hook.py @@ -56,7 +56,8 @@ class BootHookPartHandler(handlers.Handler):          util.write_file(filepath, contents, 0700)          return filepath -    def _handle_part(self, _data, ctype, filename, payload, _frequency): +    def handle_part(self, _data, ctype, filename,  # pylint: disable=W0221 +                    payload, frequency):  # pylint: disable=W0613          if ctype in handlers.CONTENT_SIGNALS:              return diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index f6d95244..d30d6338 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -22,41 +22,103 @@  from cloudinit import handlers  from cloudinit import log as logging +from cloudinit import mergers  from cloudinit import util  from cloudinit.settings import (PER_ALWAYS)  LOG = logging.getLogger(__name__) +MERGE_HEADER = 'Merge-Type' +DEF_MERGERS = mergers.default_mergers() +  class CloudConfigPartHandler(handlers.Handler):      def __init__(self, paths, **_kwargs): -        handlers.Handler.__init__(self, PER_ALWAYS) -        self.cloud_buf = [] +        handlers.Handler.__init__(self, PER_ALWAYS, version=3) +        self.cloud_buf = None          self.cloud_fn = paths.get_ipath("cloud_config") +        self.file_names = [] +        self.mergers = [DEF_MERGERS]      def list_types(self):          return [              handlers.type_from_starts_with("#cloud-config"),          ] -    def _write_cloud_config(self, buf): +    def _write_cloud_config(self):          if not self.cloud_fn:              return -        lines = [str(b) for b in buf] -        payload = "\n".join(lines) -        util.write_file(self.cloud_fn, payload, 0600) +        # Capture which files we merged from... +        file_lines = [] +        if self.file_names: +            file_lines.append("# from %s files" % (len(self.file_names))) +            for fn in self.file_names: +                file_lines.append("# %s" % (fn)) +            file_lines.append("") +        if self.cloud_buf is not None: +            # Something was actually gathered.... +            lines = [ +                "#cloud-config", +                '', +            ] +            lines.extend(file_lines) +            lines.append(util.yaml_dumps(self.cloud_buf)) +        else: +            lines = [] +        util.write_file(self.cloud_fn, "\n".join(lines), 0600) + +    def _extract_mergers(self, payload, headers): +        merge_header_headers = '' +        for h in [MERGE_HEADER, 'X-%s' % (MERGE_HEADER)]: +            tmp_h = headers.get(h, '') +            if tmp_h: +                merge_header_headers = tmp_h +                break +        # Select either the merge-type from the content +        # or the merge type from the headers or default to our own set +        # if neither exists (or is empty) from the later. +        payload_yaml = util.load_yaml(payload) +        mergers_yaml = mergers.dict_extract_mergers(payload_yaml) +        mergers_header = mergers.string_extract_mergers(merge_header_headers) +        all_mergers = [] +        all_mergers.extend(mergers_yaml) +        all_mergers.extend(mergers_header) +        if not all_mergers: +            all_mergers = DEF_MERGERS +        return all_mergers + +    def _merge_part(self, payload, headers): +        next_mergers = self._extract_mergers(payload, headers) +        # Use the merger list from the last call, since it is the one +        # that will be defining how to merge with the next payload. +        curr_mergers = list(self.mergers[-1]) +        LOG.debug("Merging by applying %s", curr_mergers) +        self.mergers.append(next_mergers) +        merger = mergers.construct(curr_mergers) +        if self.cloud_buf is None: +            # First time through, merge with an empty dict... +            self.cloud_buf = {} +        self.cloud_buf = merger.merge(self.cloud_buf, +                                      util.load_yaml(payload)) -    def _handle_part(self, _data, ctype, filename, payload, _frequency): +    def _reset(self): +        self.file_names = [] +        self.cloud_buf = None +        self.mergers = [DEF_MERGERS] + +    def handle_part(self, _data, ctype, filename,  # pylint: disable=W0221 +                    payload, _frequency, headers):  # pylint: disable=W0613          if ctype == handlers.CONTENT_START: -            self.cloud_buf = [] +            self._reset()              return          if ctype == handlers.CONTENT_END: -            self._write_cloud_config(self.cloud_buf) -            self.cloud_buf = [] +            self._write_cloud_config() +            self._reset()              return - -        filename = util.clean_filename(filename) -        if not filename: -            filename = '??' -        self.cloud_buf.extend(["#%s" % (filename), str(payload)]) +        try: +            self._merge_part(payload, headers) +            self.file_names.append(filename) +        except: +            util.logexc(LOG, "Failed at merging in cloud config part from %s", +                        filename) diff --git a/cloudinit/handlers/shell_script.py b/cloudinit/handlers/shell_script.py index 6c5c11ca..b185c374 100644 --- a/cloudinit/handlers/shell_script.py +++ b/cloudinit/handlers/shell_script.py @@ -41,7 +41,8 @@ class ShellScriptPartHandler(handlers.Handler):              handlers.type_from_starts_with("#!"),          ] -    def _handle_part(self, _data, ctype, filename, payload, _frequency): +    def handle_part(self, _data, ctype, filename,  # pylint: disable=W0221 +                    payload, frequency):  # pylint: disable=W0613          if ctype in handlers.CONTENT_SIGNALS:              # TODO(harlowja): maybe delete existing things here              return diff --git a/cloudinit/handlers/upstart_job.py b/cloudinit/handlers/upstart_job.py index 0aa7446e..edd56527 100644 --- a/cloudinit/handlers/upstart_job.py +++ b/cloudinit/handlers/upstart_job.py @@ -42,7 +42,8 @@ class UpstartJobPartHandler(handlers.Handler):              handlers.type_from_starts_with("#upstart-job"),          ] -    def _handle_part(self, _data, ctype, filename, payload, frequency): +    def handle_part(self, _data, ctype, filename,  # pylint: disable=W0221 +                    payload, frequency):          if ctype in handlers.CONTENT_SIGNALS:              return diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py index 2077401c..a4e6fb03 100644 --- a/cloudinit/helpers.py +++ b/cloudinit/helpers.py @@ -32,6 +32,7 @@ from cloudinit.settings import (PER_INSTANCE, PER_ALWAYS, PER_ONCE,                                  CFG_ENV_NAME)  from cloudinit import log as logging +from cloudinit import type_utils  from cloudinit import util  LOG = logging.getLogger(__name__) @@ -68,7 +69,7 @@ class FileLock(object):          self.fn = fn      def __str__(self): -        return "<%s using file %r>" % (util.obj_name(self), self.fn) +        return "<%s using file %r>" % (type_utils.obj_name(self), self.fn)  def canon_sem_name(name): diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py new file mode 100644 index 00000000..e1ff57ba --- /dev/null +++ b/cloudinit/mergers/__init__.py @@ -0,0 +1,154 @@ +# vi: ts=4 expandtab +# +#    Copyright (C) 2012 Yahoo! Inc. +# +#    Author: Joshua Harlow <harlowja@yahoo-inc.com> +# +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License version 3, as +#    published by the Free Software Foundation. +# +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +# +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + +import re + +from cloudinit import importer +from cloudinit import log as logging +from cloudinit import type_utils + +NAME_MTCH = re.compile(r"(^[a-zA-Z_][A-Za-z0-9_]*)\((.*?)\)$") + +LOG = logging.getLogger(__name__) +DEF_MERGE_TYPE = "list()+dict()+str()" + + +class UnknownMerger(object): +    # Named differently so auto-method finding +    # doesn't pick this up if there is ever a type +    # named "unknown" +    def _handle_unknown(self, _meth_wanted, value, _merge_with): +        return value + +    # This merging will attempt to look for a '_on_X' method +    # in our own object for a given object Y with type X, +    # if found it will be called to perform the merge of a source +    # object and a object to merge_with. +    # +    # If not found the merge will be given to a '_handle_unknown' +    # function which can decide what to do wit the 2 values. +    def merge(self, source, merge_with): +        type_name = type_utils.obj_name(source) +        type_name = type_name.lower() +        method_name = "_on_%s" % (type_name) +        meth = None +        args = [source, merge_with] +        if hasattr(self, method_name): +            meth = getattr(self, method_name) +        if not meth: +            meth = self._handle_unknown +            args.insert(0, method_name) +        return meth(*args) + + +class LookupMerger(UnknownMerger): +    def __init__(self, lookups=None): +        UnknownMerger.__init__(self) +        if lookups is None: +            self._lookups = [] +        else: +            self._lookups = lookups + +    # For items which can not be merged by the parent this object +    # will lookup in a internally maintained set of objects and +    # find which one of those objects can perform the merge. If +    # any of the contained objects have the needed method, they +    # will be called to perform the merge. +    def _handle_unknown(self, meth_wanted, value, merge_with): +        meth = None +        for merger in self._lookups: +            if hasattr(merger, meth_wanted): +                # First one that has that method/attr gets to be +                # the one that will be called +                meth = getattr(merger, meth_wanted) +                break +        if not meth: +            return UnknownMerger._handle_unknown(self, meth_wanted, +                                                 value, merge_with) +        return meth(value, merge_with) + + +def dict_extract_mergers(config): +    parsed_mergers = [] +    raw_mergers = config.get('merge_how') +    if raw_mergers is None: +        raw_mergers = config.get('merge_type') +    if raw_mergers is None: +        return parsed_mergers +    if isinstance(raw_mergers, (str, basestring)): +        return string_extract_mergers(raw_mergers) +    for m in raw_mergers: +        if isinstance(m, (dict)): +            name = m['name'] +            name = name.replace("-", "_").strip() +            opts = m['settings'] +        else: +            name = m[0] +            if len(m) >= 2: +                opts = m[1:] +            else: +                opts = [] +        if name: +            parsed_mergers.append((name, opts)) +    return parsed_mergers + + +def string_extract_mergers(merge_how): +    parsed_mergers = [] +    for m_name in merge_how.split("+"): +        # Canonicalize the name (so that it can be found +        # even when users alter it in various ways) +        m_name = m_name.lower().strip() +        m_name = m_name.replace("-", "_") +        if not m_name: +            continue +        match = NAME_MTCH.match(m_name) +        if not match: +            msg = ("Matcher identifer '%s' is not in the right format" % +                   (m_name)) +            raise ValueError(msg) +        (m_name, m_ops) = match.groups() +        m_ops = m_ops.strip().split(",") +        m_ops = [m.strip().lower() for m in m_ops if m.strip()] +        parsed_mergers.append((m_name, m_ops)) +    return parsed_mergers + + +def default_mergers(): +    return tuple(string_extract_mergers(DEF_MERGE_TYPE)) + + +def construct(parsed_mergers): +    mergers_to_be = [] +    for (m_name, m_ops) in parsed_mergers: +        merger_locs = importer.find_module(m_name, +                                           [__name__], +                                           ['Merger']) +        if not merger_locs: +            msg = "Could not find merger named '%s'" % (m_name) +            raise ImportError(msg) +        else: +            mod = importer.import_module(merger_locs[0]) +            mod_attr = getattr(mod, 'Merger') +            mergers_to_be.append((mod_attr, m_ops)) +    # Now form them... +    mergers = [] +    root = LookupMerger(mergers) +    for (attr, opts) in mergers_to_be: +        mergers.append(attr(root, opts)) +    return root diff --git a/cloudinit/mergers/dict.py b/cloudinit/mergers/dict.py new file mode 100644 index 00000000..45a7d3a5 --- /dev/null +++ b/cloudinit/mergers/dict.py @@ -0,0 +1,48 @@ +# vi: ts=4 expandtab +# +#    Copyright (C) 2012 Yahoo! Inc. +# +#    Author: Joshua Harlow <harlowja@yahoo-inc.com> +# +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License version 3, as +#    published by the Free Software Foundation. +# +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +# +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + + +class Merger(object): +    def __init__(self, merger, opts): +        self._merger = merger +        self._overwrite = 'overwrite' in opts + +    # This merging algorithm will attempt to merge with +    # another dictionary, on encountering any other type of object +    # it will not merge with said object, but will instead return +    # the original value +    # +    # On encountering a dictionary, it will create a new dictionary +    # composed of the original and the one to merge with, if 'overwrite' +    # is enabled then keys that exist in the original will be overwritten +    # by keys in the one to merge with (and associated values). Otherwise +    # if not in overwrite mode the 2 conflicting keys themselves will +    # be merged. +    def _on_dict(self, value, merge_with): +        if not isinstance(merge_with, (dict)): +            return value +        merged = dict(value) +        for (k, v) in merge_with.items(): +            if k in merged: +                if not self._overwrite: +                    merged[k] = self._merger.merge(merged[k], v) +                else: +                    merged[k] = v +            else: +                merged[k] = v +        return merged diff --git a/cloudinit/mergers/list.py b/cloudinit/mergers/list.py new file mode 100644 index 00000000..a56ff007 --- /dev/null +++ b/cloudinit/mergers/list.py @@ -0,0 +1,50 @@ +# vi: ts=4 expandtab +# +#    Copyright (C) 2012 Yahoo! Inc. +# +#    Author: Joshua Harlow <harlowja@yahoo-inc.com> +# +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License version 3, as +#    published by the Free Software Foundation. +# +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +# +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + + +class Merger(object): +    def __init__(self, merger, opts): +        self._merger = merger +        self._discard_non = 'discard_non_list' in opts +        self._extend = 'extend' in opts + +    def _on_tuple(self, value, merge_with): +        return self._on_list(list(value), merge_with) + +    # On encountering a list or tuple type this action will be applied +    # a new list will be returned, if the value to merge with is itself +    # a list and we have been told to 'extend', then the value here will +    # be extended with the other list. If in 'extend' mode then we will +    # attempt to merge instead, which means that values from the list +    # to merge with will replace values in te original list (they will +    # also be merged recursively). +    # +    # If the value to merge with is not a list, and we are set to discared +    # then no modifications will take place, otherwise we will just append +    # the value to merge with onto the end of our own list. +    def _on_list(self, value, merge_with): +        new_value = list(value) +        if isinstance(merge_with, (tuple, list)): +            if self._extend: +                new_value.extend(merge_with) +            else: +                return new_value +        else: +            if not self._discard_non: +                new_value.append(merge_with) +        return new_value diff --git a/cloudinit/mergers/str.py b/cloudinit/mergers/str.py new file mode 100644 index 00000000..291c91c2 --- /dev/null +++ b/cloudinit/mergers/str.py @@ -0,0 +1,39 @@ +# vi: ts=4 expandtab +# +#    Copyright (C) 2012 Yahoo! Inc. +# +#    Author: Joshua Harlow <harlowja@yahoo-inc.com> +# +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License version 3, as +#    published by the Free Software Foundation. +# +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +# +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. + + +class Merger(object): +    def __init__(self, _merger, opts): +        self._append = 'append' in opts + +    # On encountering a unicode object to merge value with +    # we will for now just proxy into the string method to let it handle it. +    def _on_unicode(self, value, merge_with): +        return self._on_str(value, merge_with) + +    # On encountering a string object to merge with we will +    # perform the following action, if appending we will +    # merge them together, otherwise we will just return value. +    def _on_str(self, value, merge_with): +        if not self._append: +            return value +        else: +            if isinstance(value, (unicode)): +                return value + unicode(merge_with) +            else: +                return value + str(merge_with) diff --git a/cloudinit/sources/DataSourceAltCloud.py b/cloudinit/sources/DataSourceAltCloud.py index 9812bdcb..64548d43 100644 --- a/cloudinit/sources/DataSourceAltCloud.py +++ b/cloudinit/sources/DataSourceAltCloud.py @@ -30,6 +30,7 @@ import os.path  from cloudinit import log as logging  from cloudinit import sources  from cloudinit import util +  from cloudinit.util import ProcessExecutionError  LOG = logging.getLogger(__name__) @@ -91,8 +92,8 @@ class DataSourceAltCloud(sources.DataSource):          self.supported_seed_starts = ("/", "file://")      def __str__(self): -        mstr = "%s [seed=%s]" % (util.obj_name(self), self.seed) -        return mstr +        root = sources.DataSource.__str__(self) +        return "%s [seed=%s]" % (root, self.seed)      def get_cloud_type(self):          ''' diff --git a/cloudinit/sources/DataSourceCloudStack.py b/cloudinit/sources/DataSourceCloudStack.py index 275caf0d..81c8cda9 100644 --- a/cloudinit/sources/DataSourceCloudStack.py +++ b/cloudinit/sources/DataSourceCloudStack.py @@ -48,9 +48,6 @@ class DataSourceCloudStack(sources.DataSource):              raise RuntimeError("No virtual router found!")          self.metadata_address = "http://%s/" % (vr_addr) -    def __str__(self): -        return util.obj_name(self) -      def _get_url_settings(self):          mcfg = self.ds_cfg          if not mcfg: diff --git a/cloudinit/sources/DataSourceConfigDrive.py b/cloudinit/sources/DataSourceConfigDrive.py index ec016a1d..5f152299 100644 --- a/cloudinit/sources/DataSourceConfigDrive.py +++ b/cloudinit/sources/DataSourceConfigDrive.py @@ -51,7 +51,9 @@ class DataSourceConfigDrive(sources.DataSource):          self.ec2_metadata = None      def __str__(self): -        mstr = "%s [%s,ver=%s]" % (util.obj_name(self), self.dsmode, +        root = sources.DataSource.__str__(self) +        mstr = "%s [%s,ver=%s]" % (root, +                                   self.dsmode,                                     self.version)          mstr += "[source=%s]" % (self.source)          return mstr @@ -152,7 +154,7 @@ class DataSourceConfigDrive(sources.DataSource):              return False          md = results['metadata'] -        md = util.mergedict(md, DEFAULT_METADATA) +        md = util.mergemanydict([md, DEFAULT_METADATA])          # Perform some metadata 'fixups'          # diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 2db53446..f010e640 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -49,9 +49,6 @@ class DataSourceEc2(sources.DataSource):          self.seed_dir = os.path.join(paths.seed_dir, "ec2")          self.api_ver = DEF_MD_VERSION -    def __str__(self): -        return util.obj_name(self) -      def get_data(self):          seed_ret = {}          if util.read_optional_seed(seed_ret, base=(self.seed_dir + "/")): diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py index b55d8a21..612d8ffa 100644 --- a/cloudinit/sources/DataSourceMAAS.py +++ b/cloudinit/sources/DataSourceMAAS.py @@ -50,7 +50,8 @@ class DataSourceMAAS(sources.DataSource):          self.oauth_clockskew = None      def __str__(self): -        return "%s [%s]" % (util.obj_name(self), self.base_url) +        root = sources.DataSource.__str__(self) +        return "%s [%s]" % (root, self.base_url)      def get_data(self):          mcfg = self.ds_cfg diff --git a/cloudinit/sources/DataSourceNoCloud.py b/cloudinit/sources/DataSourceNoCloud.py index 603f0155..08a853cc 100644 --- a/cloudinit/sources/DataSourceNoCloud.py +++ b/cloudinit/sources/DataSourceNoCloud.py @@ -40,9 +40,8 @@ class DataSourceNoCloud(sources.DataSource):          self.supported_seed_starts = ("/", "file://")      def __str__(self): -        mstr = "%s [seed=%s][dsmode=%s]" % (util.obj_name(self), -                                            self.seed, self.dsmode) -        return mstr +        root = sources.DataSource.__str__(self) +        return "%s [seed=%s][dsmode=%s]" % (root, self.seed, self.dsmode)      def get_data(self):          defaults = { @@ -65,7 +64,7 @@ class DataSourceNoCloud(sources.DataSource):          # Check to see if the seed dir has data.          seedret = {}          if util.read_optional_seed(seedret, base=self.seed_dir + "/"): -            md = util.mergedict(md, seedret['meta-data']) +            md = util.mergemanydict([md, seedret['meta-data']])              ud = seedret['user-data']              found.append(self.seed_dir)              LOG.debug("Using seeded cache data from %s", self.seed_dir) @@ -82,7 +81,7 @@ class DataSourceNoCloud(sources.DataSource):              if self.ds_cfg['user-data']:                  ud = self.ds_cfg['user-data']              if self.ds_cfg['meta-data'] is not False: -                md = util.mergedict(md, self.ds_cfg['meta-data']) +                md = util.mergemanydict([md, self.ds_cfg['meta-data']])              if 'ds_config' not in found:                  found.append("ds_config") @@ -100,7 +99,7 @@ class DataSourceNoCloud(sources.DataSource):                      LOG.debug("Attempting to use data from %s", dev)                      (newmd, newud) = util.mount_cb(dev, util.read_seeded) -                    md = util.mergedict(newmd, md) +                    md = util.mergemanydict([newmd, md])                      ud = newud                      # For seed from a device, the default mode is 'net'. @@ -150,11 +149,11 @@ class DataSourceNoCloud(sources.DataSource):              LOG.debug("Using seeded cache data from %s", seedfrom)              # Values in the command line override those from the seed -            md = util.mergedict(md, md_seed) +            md = util.mergemanydict([md, md_seed])              found.append(seedfrom)          # Now that we have exhausted any other places merge in the defaults -        md = util.mergedict(md, defaults) +        md = util.mergemanydict([md, defaults])          # Update the network-interfaces if metadata had 'network-interfaces'          # entry and this is the local datasource, or 'seedfrom' was used diff --git a/cloudinit/sources/DataSourceNone.py b/cloudinit/sources/DataSourceNone.py index c2125bee..12a8a992 100644 --- a/cloudinit/sources/DataSourceNone.py +++ b/cloudinit/sources/DataSourceNone.py @@ -18,7 +18,6 @@  from cloudinit import log as logging  from cloudinit import sources -from cloudinit import util  LOG = logging.getLogger(__name__) @@ -41,9 +40,6 @@ class DataSourceNone(sources.DataSource):      def get_instance_id(self):          return 'iid-datasource-none' -    def __str__(self): -        return util.obj_name(self) -      @property      def is_disconnected(self):          return True diff --git a/cloudinit/sources/DataSourceOVF.py b/cloudinit/sources/DataSourceOVF.py index e90150c6..0530c4b7 100644 --- a/cloudinit/sources/DataSourceOVF.py +++ b/cloudinit/sources/DataSourceOVF.py @@ -43,7 +43,8 @@ class DataSourceOVF(sources.DataSource):          self.supported_seed_starts = ("/", "file://")      def __str__(self): -        return "%s [seed=%s]" % (util.obj_name(self), self.seed) +        root = sources.DataSource.__str__(self) +        return "%s [seed=%s]" % (root, self.seed)      def get_data(self):          found = [] @@ -93,11 +94,11 @@ class DataSourceOVF(sources.DataSource):              (md_seed, ud) = util.read_seeded(seedfrom, timeout=None)              LOG.debug("Using seeded cache data from %s", seedfrom) -            md = util.mergedict(md, md_seed) +            md = util.mergemanydict([md, md_seed])              found.append(seedfrom)          # Now that we have exhausted any other places merge in the defaults -        md = util.mergedict(md, defaults) +        md = util.mergemanydict([md, defaults])          self.seed = ",".join(found)          self.metadata = md diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 96baff90..d8fbacdd 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -25,6 +25,7 @@ import os  from cloudinit import importer  from cloudinit import log as logging +from cloudinit import type_utils  from cloudinit import user_data as ud  from cloudinit import util @@ -52,7 +53,7 @@ class DataSource(object):          self.userdata = None          self.metadata = None          self.userdata_raw = None -        name = util.obj_name(self) +        name = type_utils.obj_name(self)          if name.startswith(DS_PREFIX):              name = name[len(DS_PREFIX):]          self.ds_cfg = util.get_cfg_by_path(self.sys_cfg, @@ -62,6 +63,9 @@ class DataSource(object):          else:              self.ud_proc = ud_proc +    def __str__(self): +        return type_utils.obj_name(self) +      def get_userdata(self, apply_filter=False):          if self.userdata is None:              self.userdata = self.ud_proc.process(self.get_userdata_raw()) @@ -214,7 +218,7 @@ def normalize_pubkey_data(pubkey_data):  def find_source(sys_cfg, distro, paths, ds_deps, cfg_list, pkg_list):      ds_list = list_sources(cfg_list, ds_deps, pkg_list) -    ds_names = [util.obj_name(f) for f in ds_list] +    ds_names = [type_utils.obj_name(f) for f in ds_list]      LOG.debug("Searching for data source in: %s", ds_names)      for cls in ds_list: @@ -222,7 +226,7 @@ def find_source(sys_cfg, distro, paths, ds_deps, cfg_list, pkg_list):              LOG.debug("Seeing if we can get any data from %s", cls)              s = cls(sys_cfg, distro, paths)              if s.get_data(): -                return (s, util.obj_name(cls)) +                return (s, type_utils.obj_name(cls))          except Exception:              util.logexc(LOG, "Getting data from %s failed", cls) diff --git a/cloudinit/stages.py b/cloudinit/stages.py index d7d1dea0..543d247f 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -43,6 +43,7 @@ from cloudinit import helpers  from cloudinit import importer  from cloudinit import log as logging  from cloudinit import sources +from cloudinit import type_utils  from cloudinit import util  LOG = logging.getLogger(__name__) @@ -211,7 +212,7 @@ class Init(object):          # Any config provided???          pkg_list = self.cfg.get('datasource_pkg_list') or []          # Add the defaults at the end -        for n in ['', util.obj_name(sources)]: +        for n in ['', type_utils.obj_name(sources)]:              if n not in pkg_list:                  pkg_list.append(n)          cfg_list = self.cfg.get('datasource_list') or [] @@ -271,7 +272,7 @@ class Init(object):          dp = self.paths.get_cpath('data')          # Write what the datasource was and is.. -        ds = "%s: %s" % (util.obj_name(self.datasource), self.datasource) +        ds = "%s: %s" % (type_utils.obj_name(self.datasource), self.datasource)          previous_ds = None          ds_fn = os.path.join(idir, 'datasource')          try: @@ -488,7 +489,7 @@ class Modules(object):              else:                  raise TypeError(("Failed to read '%s' item in config,"                                   " unknown type %s") % -                                 (item, util.obj_name(item))) +                                 (item, type_utils.obj_name(item)))          return module_list      def _fixup_modules(self, raw_mods): @@ -506,7 +507,7 @@ class Modules(object):                  # Reset it so when ran it will get set to a known value                  freq = None              mod_locs = importer.find_module(mod_name, -                                            ['', util.obj_name(config)], +                                            ['', type_utils.obj_name(config)],                                              ['handle'])              if not mod_locs:                  LOG.warn("Could not find module named %s", mod_name) diff --git a/cloudinit/type_utils.py b/cloudinit/type_utils.py new file mode 100644 index 00000000..2decbfc5 --- /dev/null +++ b/cloudinit/type_utils.py @@ -0,0 +1,34 @@ +# vi: ts=4 expandtab +# +#    Copyright (C) 2012 Canonical Ltd. +#    Copyright (C) 2012 Hewlett-Packard Development Company, L.P. +#    Copyright (C) 2012 Yahoo! Inc. +# +#    Author: Scott Moser <scott.moser@canonical.com> +#    Author: Juerg Haefliger <juerg.haefliger@hp.com> +#    Author: Joshua Harlow <harlowja@yahoo-inc.com> +# +#    This program is free software: you can redistribute it and/or modify +#    it under the terms of the GNU General Public License version 3, as +#    published by the Free Software Foundation. +# +#    This program is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU General Public License for more details. +# +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see <http://www.gnu.org/licenses/>. +# +# pylint: disable=C0302 + +import types + + +def obj_name(obj): +    if isinstance(obj, (types.TypeType, +                        types.ModuleType, +                        types.FunctionType, +                        types.LambdaType)): +        return str(obj.__name__) +    return obj_name(obj.__class__) diff --git a/cloudinit/util.py b/cloudinit/util.py index afde2066..709d5cca 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -43,14 +43,15 @@ import subprocess  import sys  import tempfile  import time -import types  import urlparse  import yaml  from cloudinit import importer  from cloudinit import log as logging +from cloudinit import mergers  from cloudinit import safeyaml +from cloudinit import type_utils  from cloudinit import url_helper as uhelp  from cloudinit import version @@ -194,11 +195,12 @@ def fork_cb(child_cb, *args):              os._exit(0)  # pylint: disable=W0212          except:              logexc(LOG, ("Failed forking and" -                         " calling callback %s"), obj_name(child_cb)) +                         " calling callback %s"), +                   type_utils.obj_name(child_cb))              os._exit(1)  # pylint: disable=W0212      else:          LOG.debug("Forked child %s who will run callback %s", -                  fid, obj_name(child_cb)) +                  fid, type_utils.obj_name(child_cb))  def is_true(val, addons=None): @@ -512,40 +514,26 @@ def make_url(scheme, host, port=None,      return urlparse.urlunparse(pieces) -def obj_name(obj): -    if isinstance(obj, (types.TypeType, -                        types.ModuleType, -                        types.FunctionType, -                        types.LambdaType)): -        return str(obj.__name__) -    return obj_name(obj.__class__) - -  def mergemanydict(srcs, reverse=False):      if reverse:          srcs = reversed(srcs)      m_cfg = {} +    merge_how = [mergers.default_mergers()]      for a_cfg in srcs:          if a_cfg: -            m_cfg = mergedict(m_cfg, a_cfg) +            # Take the last merger as the one that +            # will define how to merge next... +            mergers_to_apply = list(merge_how[-1]) +            merger = mergers.construct(mergers_to_apply) +            m_cfg = merger.merge(m_cfg, a_cfg) +            # If the config has now has new merger set, +            # extract them to be used next time... +            new_mergers = mergers.dict_extract_mergers(m_cfg) +            if new_mergers: +                merge_how.append(new_mergers)      return m_cfg -def mergedict(src, cand): -    """ -    Merge values from C{cand} into C{src}. -    If C{src} has a key C{cand} will not override. -    Nested dictionaries are merged recursively. -    """ -    if isinstance(src, dict) and isinstance(cand, dict): -        for (k, v) in cand.iteritems(): -            if k not in src: -                src[k] = v -            else: -                src[k] = mergedict(src[k], v) -    return src - -  @contextlib.contextmanager  def chdir(ndir):      curr = os.getcwd() @@ -644,7 +632,7 @@ def load_yaml(blob, default=None, allowed=(dict,)):              # Yes this will just be caught, but thats ok for now...              raise TypeError(("Yaml load allows %s root types,"                               " but got %s instead") % -                            (allowed, obj_name(converted))) +                            (allowed, type_utils.obj_name(converted)))          loaded = converted      except (yaml.YAMLError, TypeError, ValueError):          if len(blob) == 0: @@ -713,7 +701,7 @@ def read_conf_with_confd(cfgfile):              if not isinstance(confd, (str, basestring)):                  raise TypeError(("Config file %s contains 'conf_d' "                                   "with non-string type %s") % -                                 (cfgfile, obj_name(confd))) +                                 (cfgfile, type_utils.obj_name(confd)))              else:                  confd = str(confd).strip()      elif os.path.isdir("%s.d" % cfgfile): @@ -724,7 +712,7 @@ def read_conf_with_confd(cfgfile):      # Conf.d settings override input configuration      confd_cfg = read_conf_d(confd) -    return mergedict(confd_cfg, cfg) +    return mergemanydict([confd_cfg, cfg])  def read_cc_from_cmdline(cmdline=None): @@ -1471,7 +1459,7 @@ def shellify(cmdlist, add_header=True):          else:              raise RuntimeError(("Unable to shellify type %s"                                  " which is not a list or string") -                               % (obj_name(args))) +                               % (type_utils.obj_name(args)))      LOG.debug("Shellified %s commands.", cmds_made)      return content diff --git a/doc/merging.txt b/doc/merging.txt new file mode 100644 index 00000000..f719aec8 --- /dev/null +++ b/doc/merging.txt @@ -0,0 +1,179 @@ +Arriving in 0.7.2 is a new way to handle dictionary merging in cloud-init. +--- + +Overview +-------- + +This was done because it has been a common feature request that there be a +way to specify how cloud-config yaml "dictionaries" are merged together when +there are multiple yamls to merge together (say when performing an #include). + +Since previously the merging algorithm was very simple and would only overwrite +and not append lists, or strings, and so on it was decided to create a new and +improved way to merge dictionaries (and there contained objects) together in a +way that is customizable, thus allowing for users who provide cloud-config data +to determine exactly how there objects will be merged. + +For example. + +#cloud-config (1) +run_cmd: +  - bash1 +  - bash2 + +#cloud-config (2) +run_cmd: +  - bash3 +  - bash4 + +The previous way of merging the following 2 objects would result in a final  +cloud-config object that contains the following. + +#cloud-config (merged) +run_cmd: +  - bash3 +  - bash4 + +Typically this is not what users want, instead they would likely prefer: + +#cloud-config (merged) +run_cmd: +  - bash1 +  - bash2 +  - bash3 +  - bash4 + +This way makes it easier to combine the various cloud-config objects you have +into a more useful list, thus reducing duplication that would have had to +occur in the previous method to accomplish the same result. + +Customizability +--------------- + +Since the above merging algorithm may not always be the desired merging +algorithm (like how the merging algorithm in < 0.7.2 was not always the preferred +one) the concept of customizing how merging can be done was introduced through +a new concept call 'merge classes'.  + +A merge class is a class defintion which provides functions that can be used +to merge a given type with another given type. + +An example of one of these merging classes is the following: + +class Merger(object): +    def __init__(self, merger, opts): +        self._merger = merger +        self._overwrite = 'overwrite' in opts + +    # This merging algorithm will attempt to merge with +    # another dictionary, on encountering any other type of object +    # it will not merge with said object, but will instead return +    # the original value +    # +    # On encountering a dictionary, it will create a new dictionary +    # composed of the original and the one to merge with, if 'overwrite' +    # is enabled then keys that exist in the original will be overwritten +    # by keys in the one to merge with (and associated values). Otherwise +    # if not in overwrite mode the 2 conflicting keys themselves will +    # be merged. +    def _on_dict(self, value, merge_with): +        if not isinstance(merge_with, (dict)): +            return value +        merged = dict(value) +        for (k, v) in merge_with.items(): +            if k in merged: +                if not self._overwrite: +                    merged[k] = self._merger.merge(merged[k], v) +                else: +                    merged[k] = v +            else: +                merged[k] = v +        return merged + +As you can see there is a '_on_dict' method here that will be given a source value +and a value to merge with. The result will be the merged object. This code itself +is called by another merging class which 'directs' the merging to happen by +analyzing the types of the objects to merge and attempting to find a know object +that will merge that type. I will avoid pasting that here, but it can be found +in the mergers/__init__.py file (see LookupMerger and UnknownMerger). + +So following the typical cloud-init way of allowing source code to be downloaded +and used dynamically, it is possible for users to inject there own merging files +to handle specific types of merging as they choose (the basic ones included will +handle lists, dicts, and strings). Note how each merge can have options associated +with it which affect how the merging is performed, for example a dictionary merger +can be told to overwrite instead of attempt to merge, or a string merger can be +told to append strings instead of discarding other strings to merge with. + +How to activate +--------------- + +There are a few ways to activate the merging algorithms, and to customize them +for your own usage. + +1. The first way involves the usage of MIME messages in cloud-init to specify +   multipart documents (this is one way in which multiple cloud-config is joined +   together into a single cloud-config). Two new headers are looked for, both +   of which can define the way merging is done (the first header to exist wins). +   These new headers (in lookup order) are 'Merge-Type' and 'X-Merge-Type'. The value +   should be a string which will satisfy the new merging format defintion (see +   below for this format). +2. The second way is actually specifying the merge-type in the body of the +   cloud-config dictionary. There are 2 ways to specify this, either as a string +   or as a dictionary (see format below). The keys that are looked up for this +   definition are the following (in order), 'merge_how', 'merge_type'. + +*String format* + +The string format that is expected is the following. + +"classname(option1,option2)+classname2(option3,option4)" (and so on) + +The class name there will be connected to class names used when looking for the +class that can be used to merge and options provided will be given to the class +on construction of that class. + +For example, the default string that is used when none is provided is the following: + +"list(extend)+dict()+str(append)" + +*Dictionary format* + +In cases where a dictionary can be used to specify the same information as the +string format (ie option #2 of above) it can be used, for example. + +merge_how: + - name: list +   settings: [extend] + - name: dict +   settings: [] + - name: str +   settings: [append] + +This would be the equivalent format for default string format but in dictionary +form instead of string form. + +Specifying multiple types and its effect +---------------------------------------- + +Now you may be asking yourself, if I specify a merge-type header or dictionary +for every cloud-config that I provide, what exactly happens? + +The answer is that when merging, a stack of 'merging classes' is kept, the +first one on that stack is the default merging classes, this set of mergers +will be used when the first cloud-config is merged with the initial empty +cloud-config dictionary. If the cloud-config that was just merged provided a  +set of merging classes (via the above formats) then those merging classes will +be pushed onto the stack. Now if there is a second cloud-config to be merged then +the merging classes from the cloud-config before the first will be used (not the +default) and so on. This way a cloud-config can decide how it will merge with a +cloud-config dictionary coming after it. + +Other uses +---------- + +The default merging algorithm for merging conf.d yaml files (which form a initial +yaml config for cloud-init) was also changed to use this mechanism so its full +benefits (and customization) can also be used there as well. Other places that +used the previous merging are also similar now extensible (metadata merging for +example). diff --git a/tests/unittests/test__init__.py b/tests/unittests/test__init__.py index ac082076..2c0abfbc 100644 --- a/tests/unittests/test__init__.py +++ b/tests/unittests/test__init__.py @@ -22,7 +22,8 @@ class FakeModule(handlers.Handler):      def list_types(self):          return self.types -    def _handle_part(self, data, ctype, filename, payload, frequency): +    def handle_part(self, _data, ctype, filename,  # pylint: disable=W0221 +                    payload, frequency):          pass @@ -103,6 +104,9 @@ class TestHandlerHandlePart(MockerTestCase):          self.filename = "fake filename"          self.payload = "fake payload"          self.frequency = settings.PER_INSTANCE +        self.headers = { +            'Content-Type': self.ctype, +        }      def test_normal_version_1(self):          """ @@ -118,8 +122,8 @@ class TestHandlerHandlePart(MockerTestCase):                               self.payload)          self.mocker.replay() -        handlers.run_part(mod_mock, self.data, self.ctype, self.filename, -                          self.payload, self.frequency) +        handlers.run_part(mod_mock, self.data, self.filename, +                          self.payload, self.frequency, self.headers)      def test_normal_version_2(self):          """ @@ -135,8 +139,8 @@ class TestHandlerHandlePart(MockerTestCase):                               self.payload, self.frequency)          self.mocker.replay() -        handlers.run_part(mod_mock, self.data, self.ctype, self.filename, -                          self.payload, self.frequency) +        handlers.run_part(mod_mock, self.data, self.filename, +                          self.payload, self.frequency, self.headers)      def test_modfreq_per_always(self):          """ @@ -152,8 +156,8 @@ class TestHandlerHandlePart(MockerTestCase):                               self.payload)          self.mocker.replay() -        handlers.run_part(mod_mock, self.data, self.ctype, self.filename, -                          self.payload, self.frequency) +        handlers.run_part(mod_mock, self.data, self.filename, +                          self.payload, self.frequency, self.headers)      def test_no_handle_when_modfreq_once(self):          """C{handle_part} is not called if frequency is once.""" @@ -163,8 +167,8 @@ class TestHandlerHandlePart(MockerTestCase):          self.mocker.result(settings.PER_ONCE)          self.mocker.replay() -        handlers.run_part(mod_mock, self.data, self.ctype, self.filename, -                          self.payload, self.frequency) +        handlers.run_part(mod_mock, self.data, self.filename, +                          self.payload, self.frequency, self.headers)      def test_exception_is_caught(self):          """Exceptions within C{handle_part} are caught and logged.""" @@ -178,8 +182,8 @@ class TestHandlerHandlePart(MockerTestCase):          self.mocker.throw(Exception())          self.mocker.replay() -        handlers.run_part(mod_mock, self.data, self.ctype, self.filename, -                          self.payload, self.frequency) +        handlers.run_part(mod_mock, self.data, self.filename, +                          self.payload, self.frequency, self.headers)  class TestCmdlineUrl(MockerTestCase): diff --git a/tests/unittests/test_merging.py b/tests/unittests/test_merging.py index 0037b966..ad137e85 100644 --- a/tests/unittests/test_merging.py +++ b/tests/unittests/test_merging.py @@ -1,62 +1,142 @@ -from mocker import MockerTestCase - -from cloudinit import util - - -class TestMergeDict(MockerTestCase): -    def test_simple_merge(self): -        """Test simple non-conflict merge.""" -        source = {"key1": "value1"} -        candidate = {"key2": "value2"} -        result = util.mergedict(source, candidate) -        self.assertEqual({"key1": "value1", "key2": "value2"}, result) - -    def test_nested_merge(self): -        """Test nested merge.""" -        source = {"key1": {"key1.1": "value1.1"}} -        candidate = {"key1": {"key1.2": "value1.2"}} -        result = util.mergedict(source, candidate) -        self.assertEqual( -            {"key1": {"key1.1": "value1.1", "key1.2": "value1.2"}}, result) - -    def test_merge_does_not_override(self): -        """Test that candidate doesn't override source.""" -        source = {"key1": "value1", "key2": "value2"} -        candidate = {"key1": "value2", "key2": "NEW VALUE"} -        result = util.mergedict(source, candidate) -        self.assertEqual(source, result) - -    def test_empty_candidate(self): -        """Test empty candidate doesn't change source.""" -        source = {"key": "value"} -        candidate = {} -        result = util.mergedict(source, candidate) -        self.assertEqual(source, result) - -    def test_empty_source(self): -        """Test empty source is replaced by candidate.""" -        source = {} -        candidate = {"key": "value"} -        result = util.mergedict(source, candidate) -        self.assertEqual(candidate, result) - -    def test_non_dict_candidate(self): -        """Test non-dict candidate is discarded.""" -        source = {"key": "value"} -        candidate = "not a dict" -        result = util.mergedict(source, candidate) -        self.assertEqual(source, result) - -    def test_non_dict_source(self): -        """Test non-dict source is not modified with a dict candidate.""" -        source = "not a dict" -        candidate = {"key": "value"} -        result = util.mergedict(source, candidate) -        self.assertEqual(source, result) - -    def test_neither_dict(self): -        """Test if neither candidate or source is dict source wins.""" -        source = "source" -        candidate = "candidate" -        result = util.mergedict(source, candidate) -        self.assertEqual(source, result) +from tests.unittests import helpers + +from cloudinit import mergers + + +class TestSimpleRun(helpers.MockerTestCase): +    def test_basic_merge(self): +        source = { +            'Blah': ['blah2'], +            'Blah3': 'c', +        } +        merge_with = { +            'Blah2': ['blah3'], +            'Blah3': 'b', +            'Blah': ['123'], +        } +        # Basic merge should not do thing special +        merge_how = "list()+dict()+str()" +        merger_set = mergers.string_extract_mergers(merge_how) +        self.assertEquals(3, len(merger_set)) +        merger = mergers.construct(merger_set) +        merged = merger.merge(source, merge_with) +        self.assertEquals(merged['Blah'], ['blah2']) +        self.assertEquals(merged['Blah2'], ['blah3']) +        self.assertEquals(merged['Blah3'], 'c') + +    def test_dict_overwrite(self): +        source = { +            'Blah': ['blah2'], +        } +        merge_with = { +            'Blah': ['123'], +        } +        # Now lets try a dict overwrite +        merge_how = "list()+dict(overwrite)+str()" +        merger_set = mergers.string_extract_mergers(merge_how) +        self.assertEquals(3, len(merger_set)) +        merger = mergers.construct(merger_set) +        merged = merger.merge(source, merge_with) +        self.assertEquals(merged['Blah'], ['123']) + +    def test_string_append(self): +        source = { +            'Blah': 'blah2', +        } +        merge_with = { +            'Blah': '345', +        } +        merge_how = "list()+dict()+str(append)" +        merger_set = mergers.string_extract_mergers(merge_how) +        self.assertEquals(3, len(merger_set)) +        merger = mergers.construct(merger_set) +        merged = merger.merge(source, merge_with) +        self.assertEquals(merged['Blah'], 'blah2345') + +    def test_list_extend(self): +        source = ['abc'] +        merge_with = ['123'] +        merge_how = "list(extend)+dict()+str()" +        merger_set = mergers.string_extract_mergers(merge_how) +        self.assertEquals(3, len(merger_set)) +        merger = mergers.construct(merger_set) +        merged = merger.merge(source, merge_with) +        self.assertEquals(merged, ['abc', '123']) + +    def test_deep_merge(self): +        source = { +            'a': [1, 'b', 2], +            'b': 'blahblah', +            'c': { +                'e': [1, 2, 3], +                'f': 'bigblobof', +                'iamadict': { +                    'ok': 'ok', +                } +            }, +            'run': [ +                'runme', +                'runme2', +            ], +            'runmereally': [ +                'e', ['a'], 'd', +            ], +        } +        merge_with = { +            'a': ['e', 'f', 'g'], +            'b': 'more', +            'c': { +                'a': 'b', +                'f': 'stuff', +            }, +            'run': [ +                'morecmd', +                'moremoremore', +            ], +            'runmereally': [ +                'blah', ['b'], 'e', +            ], +        } +        merge_how = "list(extend)+dict()+str(append)" +        merger_set = mergers.string_extract_mergers(merge_how) +        self.assertEquals(3, len(merger_set)) +        merger = mergers.construct(merger_set) +        merged = merger.merge(source, merge_with) +        self.assertEquals(merged['a'], [1, 'b', 2, 'e', 'f', 'g']) +        self.assertEquals(merged['b'], 'blahblahmore') +        self.assertEquals(merged['c']['f'], 'bigblobofstuff') +        self.assertEquals(merged['run'], ['runme', 'runme2', 'morecmd', +                                          'moremoremore']) +        self.assertEquals(merged['runmereally'], ['e', ['a'], 'd', 'blah', +                                                  ['b'], 'e']) + +    def test_dict_overwrite_layered(self): +        source = { +            'Blah3': { +                'f': '3', +                'g': { +                    'a': 'b', +                } +            } +        } +        merge_with = { +            'Blah3': { +                'e': '2', +                'g': { +                    'e': 'f', +                } +            } +        } +        merge_how = "list()+dict()+str()" +        merger_set = mergers.string_extract_mergers(merge_how) +        self.assertEquals(3, len(merger_set)) +        merger = mergers.construct(merger_set) +        merged = merger.merge(source, merge_with) +        self.assertEquals(merged['Blah3'], { +                'e': '2', +                'f': '3', +                'g': { +                    'a': 'b', +                    'e': 'f', +                } +        }) diff --git a/tests/unittests/test_userdata.py b/tests/unittests/test_userdata.py index 82a4c555..fdfe2542 100644 --- a/tests/unittests/test_userdata.py +++ b/tests/unittests/test_userdata.py @@ -7,14 +7,17 @@ import os  from email.mime.base import MIMEBase -from mocker import MockerTestCase - +from cloudinit import handlers +from cloudinit import helpers as c_helpers  from cloudinit import log  from cloudinit import sources  from cloudinit import stages +from cloudinit import util  INSTANCE_ID = "i-testing" +from tests.unittests import helpers +  class FakeDataSource(sources.DataSource): @@ -26,22 +29,16 @@ class FakeDataSource(sources.DataSource):  # FIXME: these tests shouldn't be checking log output??  # Weirddddd... - - -class TestConsumeUserData(MockerTestCase): +class TestConsumeUserData(helpers.FilesystemMockingTestCase):      def setUp(self): -        MockerTestCase.setUp(self) -        # Replace the write so no actual files -        # get written out... -        self.mock_write = self.mocker.replace("cloudinit.util.write_file", -            passthrough=False) +        helpers.FilesystemMockingTestCase.setUp(self)          self._log = None          self._log_file = None          self._log_handler = None      def tearDown(self): -        MockerTestCase.tearDown(self) +        helpers.FilesystemMockingTestCase.tearDown(self)          if self._log_handler and self._log:              self._log.removeHandler(self._log_handler) @@ -53,13 +50,77 @@ class TestConsumeUserData(MockerTestCase):          self._log.addHandler(self._log_handler)          return log_file +    def test_merging_cloud_config(self): +        blob = ''' +#cloud-config +a: b +e: f +run: + - b + - c +''' +        message1 = MIMEBase("text", "cloud-config") +        message1['Merge-Type'] = 'dict()+list(extend)+str(append)' +        message1.set_payload(blob) + +        blob2 = ''' +#cloud-config +a: e +e: g +run: + - stuff + - morestuff +''' +        message2 = MIMEBase("text", "cloud-config") +        message2['X-Merge-Type'] = 'dict()+list(extend)+str()' +        message2.set_payload(blob2) + +        blob3 = ''' +#cloud-config +e: + - 1 + - 2 + - 3 +p: 1 +''' +        message3 = MIMEBase("text", "cloud-config") +        message3['Merge-Type'] = 'dict()+list()+str()' +        message3.set_payload(blob3) + +        messages = [message1, message2, message3] + +        paths = c_helpers.Paths({}, ds=FakeDataSource('')) +        cloud_cfg = handlers.cloud_config.CloudConfigPartHandler(paths) + +        new_root = self.makeDir() +        self.patchUtils(new_root) +        self.patchOS(new_root) +        cloud_cfg.handle_part(None, handlers.CONTENT_START, None, None, None, +                              None) +        for i, m in enumerate(messages): +            headers = dict(m) +            fn = "part-%s" % (i + 1) +            payload = m.get_payload(decode=True) +            cloud_cfg.handle_part(None, headers['Content-Type'], +                                  fn, payload, None, headers) +        cloud_cfg.handle_part(None, handlers.CONTENT_END, None, None, None, +                              None) +        contents = util.load_file(paths.get_ipath('cloud_config')) +        contents = util.load_yaml(contents) +        self.assertEquals(contents['run'], ['b', 'c', 'stuff', 'morestuff']) +        self.assertEquals(contents['a'], 'be') +        self.assertEquals(contents['e'], 'fg') +        self.assertEquals(contents['p'], 1) +      def test_unhandled_type_warning(self):          """Raw text without magic is ignored but shows warning."""          ci = stages.Init()          data = "arbitrary text\n"          ci.datasource = FakeDataSource(data) -        self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) +        mock_write = self.mocker.replace("cloudinit.util.write_file", +                                              passthrough=False) +        mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)          self.mocker.replay()          log_file = self.capture_log(logging.WARNING) @@ -76,7 +137,9 @@ class TestConsumeUserData(MockerTestCase):          message.set_payload("Just text")          ci.datasource = FakeDataSource(message.as_string()) -        self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) +        mock_write = self.mocker.replace("cloudinit.util.write_file", +                                              passthrough=False) +        mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)          self.mocker.replay()          log_file = self.capture_log(logging.WARNING) @@ -93,8 +156,10 @@ class TestConsumeUserData(MockerTestCase):          ci.datasource = FakeDataSource(script)          outpath = os.path.join(ci.paths.get_ipath_cur("scripts"), "part-001") -        self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) -        self.mock_write(outpath, script, 0700) +        mock_write = self.mocker.replace("cloudinit.util.write_file", +                                              passthrough=False) +        mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) +        mock_write(outpath, script, 0700)          self.mocker.replay()          log_file = self.capture_log(logging.WARNING) @@ -111,8 +176,10 @@ class TestConsumeUserData(MockerTestCase):          ci.datasource = FakeDataSource(message.as_string())          outpath = os.path.join(ci.paths.get_ipath_cur("scripts"), "part-001") -        self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) -        self.mock_write(outpath, script, 0700) +        mock_write = self.mocker.replace("cloudinit.util.write_file", +                                              passthrough=False) +        mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) +        mock_write(outpath, script, 0700)          self.mocker.replay()          log_file = self.capture_log(logging.WARNING) @@ -129,8 +196,10 @@ class TestConsumeUserData(MockerTestCase):          ci.datasource = FakeDataSource(message.as_string())          outpath = os.path.join(ci.paths.get_ipath_cur("scripts"), "part-001") -        self.mock_write(outpath, script, 0700) -        self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) +        mock_write = self.mocker.replace("cloudinit.util.write_file", +                                         passthrough=False) +        mock_write(outpath, script, 0700) +        mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)          self.mocker.replay()          log_file = self.capture_log(logging.WARNING) | 
