25 files changed, 515 insertions, 119 deletions
diff --git a/cloudinit/config/cc_landscape.py b/cloudinit/config/cc_landscape.py
index 2efdff79..47c10a97 100644
--- a/cloudinit/config/cc_landscape.py
+++ b/cloudinit/config/cc_landscape.py
@@ -24,6 +24,7 @@ from StringIO import StringIO
 
 from configobj import ConfigObj
 
+from cloudinit import type_utils
 from cloudinit import util
 
 from cloudinit.settings import PER_INSTANCE
@@ -58,7 +59,7 @@ def handle(_name, cfg, cloud, log, _args):
     if not isinstance(ls_cloudcfg, (dict)):
         raise RuntimeError(("'landscape' key existed in config,"
                             " but not a dictionary type,"
-                            " is a %s instead"), util.obj_name(ls_cloudcfg))
+                            " is a %s instead"), type_utils.obj_name(ls_cloudcfg))
     if not ls_cloudcfg:
         return
 
diff --git a/cloudinit/config/cc_mounts.py b/cloudinit/config/cc_mounts.py
index 9010d97f..390ba711 100644
--- a/cloudinit/config/cc_mounts.py
+++ b/cloudinit/config/cc_mounts.py
@@ -22,6 +22,7 @@ from string import whitespace  # pylint: disable=W0402
 
 import re
 
+from cloudinit import type_utils
 from cloudinit import util
 
 # Shortname matches 'sda', 'sda1', 'xvda', 'hda', 'sdb', xvdb, vda, vdd1, sr0
@@ -60,7 +61,7 @@ def handle(_name, cfg, cloud, log, _args):
         # skip something that wasn't a list
         if not isinstance(cfgmnt[i], list):
             log.warn("Mount option %s not a list, got a %s instead",
-                     (i + 1), util.obj_name(cfgmnt[i]))
+                     (i + 1), type_utils.obj_name(cfgmnt[i]))
             continue
 
         startname = str(cfgmnt[i][0])
diff --git a/cloudinit/distros/__init__.py b/cloudinit/distros/__init__.py
index 2a2d8216..7b6276c5 100644
--- a/cloudinit/distros/__init__.py
+++ b/cloudinit/distros/__init__.py
@@ -31,6 +31,7 @@ import re
 from cloudinit import importer
 from cloudinit import log as logging
 from cloudinit import ssh_util
+from cloudinit import type_utils
 from cloudinit import util
 
 from cloudinit.distros.parsers import hosts
@@ -445,7 +446,7 @@ class Distro(object):
             lines.append("%s %s" % (user, rules))
         else:
             msg = "Can not create sudoers rule addition with type %r"
-            raise TypeError(msg % (util.obj_name(rules)))
+            raise TypeError(msg % (type_utils.obj_name(rules)))
         content = "\n".join(lines)
         content += "\n"  # trailing newline
 
@@ -568,7 +569,7 @@ def _normalize_groups(grp_cfg):
                             c_grp_cfg[k] = [v]
                         else:
                             raise TypeError("Bad group member type %s" %
-                                            util.obj_name(v))
+                                            type_utils.obj_name(v))
                     else:
                         if isinstance(v, (list)):
                             c_grp_cfg[k].extend(v)
@@ -576,13 +577,13 @@ def _normalize_groups(grp_cfg):
                             c_grp_cfg[k].append(v)
                         else:
                             raise TypeError("Bad group member type %s" %
-                                            util.obj_name(v))
+                                            type_utils.obj_name(v))
             elif isinstance(i, (str, basestring)):
                 if i not in c_grp_cfg:
                     c_grp_cfg[i] = []
             else:
                 raise TypeError("Unknown group name type %s" %
-                                util.obj_name(i))
+                                type_utils.obj_name(i))
         grp_cfg = c_grp_cfg
     groups = {}
     if isinstance(grp_cfg, (dict)):
@@ -591,7 +592,7 @@ def _normalize_groups(grp_cfg):
     else:
         raise TypeError(("Group config must be list, dict "
                          " or string types only and not %s") %
-                        util.obj_name(grp_cfg))
+                        type_utils.obj_name(grp_cfg))
     return groups
 
 
@@ -622,7 +623,7 @@ def _normalize_users(u_cfg, def_user_cfg=None):
                 ad_ucfg.append(v)
             else:
                 raise TypeError(("Unmappable user value type %s"
-                                 " for key %s") % (util.obj_name(v), k))
+                                 " for key %s") % (type_utils.obj_name(v), k))
         u_cfg = ad_ucfg
     elif isinstance(u_cfg, (str, basestring)):
         u_cfg = util.uniq_merge_sorted(u_cfg)
@@ -647,7 +648,7 @@ def _normalize_users(u_cfg, def_user_cfg=None):
         else:
             raise TypeError(("User config must be dictionary/list "
                              " or string types only and not %s") %
-                            util.obj_name(user_config))
+                            type_utils.obj_name(user_config))
 
     # Ensure user options are in the right python friendly format
     if users:
diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py
index 8d6dcd4d..924463ce 100644
--- a/cloudinit/handlers/__init__.py
+++ b/cloudinit/handlers/__init__.py
@@ -27,6 +27,7 @@ from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE, FREQUENCIES)
 
 from cloudinit import importer
 from cloudinit import log as logging
+from cloudinit import type_utils
 from cloudinit import util
 
 LOG = logging.getLogger(__name__)
@@ -69,7 +70,6 @@ INCLUSION_SRCH = sorted(list(INCLUSION_TYPES_MAP.keys()),
 
 
 class Handler(object):
-
     __metaclass__ = abc.ABCMeta
 
     def __init__(self, frequency, version=2):
@@ -77,53 +77,66 @@ class Handler(object):
         self.frequency = frequency
 
     def __repr__(self):
-        return "%s: [%s]" % (util.obj_name(self), self.list_types())
+        return "%s: [%s]" % (type_utils.obj_name(self), self.list_types())
 
     @abc.abstractmethod
     def list_types(self):
         raise NotImplementedError()
 
-    def handle_part(self, data, ctype, filename, payload, frequency):
-        return self._handle_part(data, ctype, filename, payload, frequency)
-
     @abc.abstractmethod
-    def _handle_part(self, data, ctype, filename, payload, frequency):
+    def handle_part(self, *args, **kwargs):
         raise NotImplementedError()
 
 
-def run_part(mod, data, ctype, filename, payload, frequency):
+def run_part(mod, data, filename, payload, frequency, headers):
     mod_freq = mod.frequency
     if not (mod_freq == PER_ALWAYS or
             (frequency == PER_INSTANCE and mod_freq == PER_INSTANCE)):
         return
-    mod_ver = mod.handler_version
     # Sanity checks on version (should be an int convertable)
     try:
+        mod_ver = mod.handler_version
         mod_ver = int(mod_ver)
-    except:
+    except (TypeError, ValueError, AttributeError):
         mod_ver = 1
+    content_type = headers['Content-Type']
     try:
         LOG.debug("Calling handler %s (%s, %s, %s) with frequency %s",
-                  mod, ctype, filename, mod_ver, frequency)
-        if mod_ver >= 2:
+                  mod, content_type, filename, mod_ver, frequency)
+        if mod_ver == 3:
+            # Treat as v. 3 which does get a frequency + headers
+            mod.handle_part(data, content_type, filename,
+                            payload, frequency, headers)
+        elif mod_ver == 2:
             # Treat as v. 2 which does get a frequency
-            mod.handle_part(data, ctype, filename, payload, frequency)
-        else:
+            mod.handle_part(data, content_type, filename,
+                            payload, frequency)
+        elif mod_ver == 1:
             # Treat as v. 1 which gets no frequency
-            mod.handle_part(data, ctype, filename, payload)
+            mod.handle_part(data, content_type, filename, payload)
+        else:
+            raise ValueError("Unknown module version %s" % (mod_ver))
     except:
         util.logexc(LOG, ("Failed calling handler %s (%s, %s, %s)"
                          " with frequency %s"),
-                    mod, ctype, filename,
+                    mod, content_type, filename,
                     mod_ver, frequency)
 
 
 def call_begin(mod, data, frequency):
-    run_part(mod, data, CONTENT_START, None, None, frequency)
+    # Create a fake header set
+    headers = {
+        'Content-Type': CONTENT_START,
+    }
+    run_part(mod, data, None, None, frequency, headers)
 
 
 def call_end(mod, data, frequency):
-    run_part(mod, data, CONTENT_END, None, None, frequency)
+    # Create a fake header set
+    headers = {
+        'Content-Type': CONTENT_END,
+    }
+    run_part(mod, data, None, None, frequency, headers)
 
 
 def walker_handle_handler(pdata, _ctype, _filename, payload):
@@ -173,26 +186,27 @@ def _escape_string(text):
     return text
 
 
-def walker_callback(pdata, ctype, filename, payload):
-    if ctype in PART_CONTENT_TYPES:
-        walker_handle_handler(pdata, ctype, filename, payload)
+def walker_callback(data, filename, payload, headers):
+    content_type = headers['Content-Type']
+    if content_type in PART_CONTENT_TYPES:
+        walker_handle_handler(data, content_type, filename, payload)
         return
-    handlers = pdata['handlers']
-    if ctype in pdata['handlers']:
-        run_part(handlers[ctype], pdata['data'], ctype, filename,
-                 payload, pdata['frequency'])
+    handlers = data['handlers']
+    if content_type in handlers:
+        run_part(handlers[content_type], data['data'], filename,
+                 payload, data['frequency'], headers)
     elif payload:
         # Extract the first line or 24 bytes for displaying in the log
         start = _extract_first_or_bytes(payload, 24)
         details = "'%s...'" % (_escape_string(start))
-        if ctype == NOT_MULTIPART_TYPE:
+        if content_type == NOT_MULTIPART_TYPE:
             LOG.warning("Unhandled non-multipart (%s) userdata: %s",
-                        ctype, details)
+                        content_type, details)
         else:
             LOG.warning("Unhandled unknown content-type (%s) userdata: %s",
-                        ctype, details)
+                        content_type, details)
     else:
-        LOG.debug("empty payload of type %s" % ctype)
+        LOG.debug("Empty payload of type %s", content_type)
 
 
 # Callback is a function that will be called with
@@ -212,7 +226,10 @@ def walk(msg, callback, data):
         if not filename:
             filename = PART_FN_TPL % (partnum)
 
-        callback(data, ctype, filename, part.get_payload(decode=True))
+        headers = dict(part)
+        LOG.debug(headers)
+        headers['Content-Type'] = ctype
+        callback(data, filename, part.get_payload(decode=True), headers)
         partnum = partnum + 1
 
 
diff --git a/cloudinit/handlers/boot_hook.py b/cloudinit/handlers/boot_hook.py
index 456b8020..bf313f10 100644
--- a/cloudinit/handlers/boot_hook.py
+++ b/cloudinit/handlers/boot_hook.py
@@ -56,7 +56,7 @@ class BootHookPartHandler(handlers.Handler):
         util.write_file(filepath, contents, 0700)
         return filepath
 
-    def _handle_part(self, _data, ctype, filename, payload, _frequency):
+    def handle_part(self, _data, ctype, filename, payload, _frequency):
         if ctype in handlers.CONTENT_SIGNALS:
             return
 
diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py
index f6d95244..5f519f78 100644
--- a/cloudinit/handlers/cloud_config.py
+++ b/cloudinit/handlers/cloud_config.py
@@ -22,18 +22,24 @@
 
 from cloudinit import handlers
 from cloudinit import log as logging
+from cloudinit import mergers
 from cloudinit import util
 
 from cloudinit.settings import (PER_ALWAYS)
 
 LOG = logging.getLogger(__name__)
 
+MERGE_HEADER = 'Merge-Type'
+DEF_MERGERS = mergers.default_mergers()
+
 
 class CloudConfigPartHandler(handlers.Handler):
     def __init__(self, paths, **_kwargs):
-        handlers.Handler.__init__(self, PER_ALWAYS)
-        self.cloud_buf = []
+        handlers.Handler.__init__(self, PER_ALWAYS, version=3)
+        self.cloud_buf = None
         self.cloud_fn = paths.get_ipath("cloud_config")
+        self.file_names = []
+        self.mergers = [DEF_MERGERS]
 
     def list_types(self):
         return [
@@ -43,20 +49,75 @@ class CloudConfigPartHandler(handlers.Handler):
     def _write_cloud_config(self, buf):
         if not self.cloud_fn:
             return
-        lines = [str(b) for b in buf]
-        payload = "\n".join(lines)
-        util.write_file(self.cloud_fn, payload, 0600)
+        # Capture which files we merged from...
+        file_lines = []
+        if self.file_names:
+            file_lines.append("# from %s files" % (len(self.file_names)))
+            for fn in self.file_names:
+                file_lines.append("# %s" % (fn))
+            file_lines.append("")
+        if self.cloud_buf is not None:
+            # Something was actually gathered....
+            lines = [
+                "#cloud-config",
+                '',
+            ]
+            lines.extend(file_lines)
+            lines.append(util.yaml_dumps(self.cloud_buf))
+        else:
+            lines = []
+        util.write_file(self.cloud_fn, "\n".join(lines), 0600)
+
+    def _extract_mergers(self, payload, headers):
+        merge_header_headers = ''
+        for h in [MERGE_HEADER, 'X-%s' % (MERGE_HEADER)]:
+            tmp_h = headers.get(h, '')
+            if tmp_h:
+                merge_header_headers = tmp_h
+                break
+        # Select either the merge-type from the content
+        # or the merge type from the headers or default to our own set
+        # if neither exists (or is empty) from the later.
+        payload_yaml = util.load_yaml(payload)
+        mergers_yaml = mergers.dict_extract_mergers(payload_yaml)
+        mergers_header = mergers.string_extract_mergers(merge_header_headers)
+        all_mergers = []
+        all_mergers.extend(mergers_yaml)
+        all_mergers.extend(mergers_header)
+        if not all_mergers:
+            all_mergers = DEF_MERGERS
+        return all_mergers
+
+    def _merge_part(self, payload, headers):
+        next_mergers = self._extract_mergers(payload, headers)
+        # Use the merger list from the last call, since it is the one
+        # that will be defining how to merge with the next payload.
+        curr_mergers = list(self.mergers[-1])
+        LOG.debug("Merging by applying %s", curr_mergers)
+        self.mergers.append(next_mergers)
+        merger = mergers.construct(curr_mergers)
+        if self.cloud_buf is None:
+            # First time through, merge with an empty dict...
+            self.cloud_buf = {}
+        self.cloud_buf = merger.merge(self.cloud_buf,
+                                      util.load_yaml(payload))
 
-    def _handle_part(self, _data, ctype, filename, payload, _frequency):
+    def _reset(self):
+        self.file_names = []
+        self.cloud_buf = None
+        self.mergers = [DEF_MERGERS]
+
+    def handle_part(self, _data, ctype, filename, payload, _freq, headers):
         if ctype == handlers.CONTENT_START:
-            self.cloud_buf = []
+            self._reset()
             return
         if ctype == handlers.CONTENT_END:
             self._write_cloud_config(self.cloud_buf)
-            self.cloud_buf = []
+            self._reset()
             return
-
-        filename = util.clean_filename(filename)
-        if not filename:
-            filename = '??'
-        self.cloud_buf.extend(["#%s" % (filename), str(payload)])
+        try:
+            self._merge_part(payload, headers)
+            self.file_names.append(filename)
+        except:
+            util.logexc(LOG, "Failed at merging in cloud config part from %s",
+                        filename)
diff --git a/cloudinit/handlers/shell_script.py b/cloudinit/handlers/shell_script.py
index 6c5c11ca..2a87e8dd 100644
--- a/cloudinit/handlers/shell_script.py
+++ b/cloudinit/handlers/shell_script.py
@@ -41,7 +41,7 @@ class ShellScriptPartHandler(handlers.Handler):
             handlers.type_from_starts_with("#!"),
         ]
 
-    def _handle_part(self, _data, ctype, filename, payload, _frequency):
+    def handle_part(self, _data, ctype, filename, payload, _frequency):
         if ctype in handlers.CONTENT_SIGNALS:
             # TODO(harlowja): maybe delete existing things here
             return
diff --git a/cloudinit/handlers/upstart_job.py b/cloudinit/handlers/upstart_job.py
index 0aa7446e..3d8833a1 100644
--- a/cloudinit/handlers/upstart_job.py
+++ b/cloudinit/handlers/upstart_job.py
@@ -42,7 +42,7 @@ class UpstartJobPartHandler(handlers.Handler):
             handlers.type_from_starts_with("#upstart-job"),
         ]
 
-    def _handle_part(self, _data, ctype, filename, payload, frequency):
+    def handle_part(self, _data, ctype, filename, payload, frequency):
         if ctype in handlers.CONTENT_SIGNALS:
             return
 
diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py
index 2077401c..a4e6fb03 100644
--- a/cloudinit/helpers.py
+++ b/cloudinit/helpers.py
@@ -32,6 +32,7 @@ from cloudinit.settings import (PER_INSTANCE, PER_ALWAYS, PER_ONCE,
                                 CFG_ENV_NAME)
 
 from cloudinit import log as logging
+from cloudinit import type_utils
 from cloudinit import util
 
 LOG = logging.getLogger(__name__)
@@ -68,7 +69,7 @@ class FileLock(object):
         self.fn = fn
 
     def __str__(self):
-        return "<%s using file %r>" % (util.obj_name(self), self.fn)
+        return "<%s using file %r>" % (type_utils.obj_name(self), self.fn)
 
 
 def canon_sem_name(name):
diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py
new file mode 100644
index 00000000..45e88fb3
--- /dev/null
+++ b/cloudinit/mergers/__init__.py
@@ -0,0 +1,155 @@
+# vi: ts=4 expandtab
+#
+#    Copyright (C) 2012 Yahoo! Inc.
+#
+#    Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License version 3, as
+#    published by the Free Software Foundation.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import re
+
+from cloudinit import importer
+from cloudinit import log as logging
+from cloudinit import type_utils
+
+NAME_MTCH = re.compile(r"(^[a-zA-Z_][A-Za-z0-9_]*)\((.*?)\)$")
+
+LOG = logging.getLogger(__name__)
+DEF_MERGE_TYPE = "list(extend)+dict()+str(append)"
+
+
+class UnknownMerger(object):
+    # Named differently so auto-method finding
+    # doesn't pick this up if there is ever a type
+    # named "unknown"
+    def _handle_unknown(self, meth_wanted, value, merge_with):
+        return value
+
+    # This merging will attempt to look for a '_on_X' method
+    # in our own object for a given object Y with type X,
+    # if found it will be called to perform the merge of a source
+    # object and a object to merge_with.
+    #
+    # If not found the merge will be given to a '_handle_unknown'
+    # function which can decide what to do wit the 2 values.
+    def merge(self, source, merge_with):
+        type_name = type_utils.obj_name(source)
+        type_name = type_name.lower()
+        method_name = "_on_%s" % (type_name)
+        meth = None
+        args = [source, merge_with]
+        if hasattr(self, method_name):
+            meth = getattr(self, method_name)
+        if not meth:
+            meth = self._handle_unknown
+            args.insert(0, method_name)
+        return meth(*args)
+
+
+class LookupMerger(UnknownMerger):
+    def __init__(self, lookups=None):
+        UnknownMerger.__init__(self)
+        if lookups is None:
+            self._lookups = []
+        else:
+            self._lookups = lookups
+
+    # For items which can not be merged by the parent this object
+    # will lookup in a internally maintained set of objects and
+    # find which one of those objects can perform the merge. If
+    # any of the contained objects have the needed method, they
+    # will be called to perform the merge.
+    def _handle_unknown(self, meth_wanted, value, merge_with):
+        meth = None
+        for merger in self._lookups:
+            if hasattr(merger, meth_wanted):
+                # First one that has that method/attr gets to be
+                # the one that will be called
+                meth = getattr(merger, meth_wanted)
+                break
+        if not meth:
+            return UnknownMerger._handle_unknown(self, meth_wanted,
+                                                 value, merge_with)
+        return meth(value, merge_with)
+
+
+def dict_extract_mergers(config):
+    parsed_mergers = []
+    raw_mergers = config.get('merge_how')
+    if raw_mergers is None:
+        raw_mergers = config.get('merge_type')
+    if raw_mergers is None:
+        return parsed_mergers
+    if isinstance(raw_mergers, (str, basestring)):
+        return string_extract_mergers(raw_mergers)
+    for m in raw_mergers:
+        if isinstance(m, (dict)):
+            name = m['name']
+            name = name.replace("-", "_").strip()
+            opts = m['settings']
+        else:
+            name = m[0]
+            if len(m) >= 2:
+                opts = m[1:]
+            else:
+                opts = []
+        if name:
+            parsed_mergers.append((name, opts))
+    return parsed_mergers
+
+
+def string_extract_mergers(merge_how):
+    parsed_mergers = []
+    for m_name in merge_how.split("+"):
+        # Canonicalize the name (so that it can be found
+        # even when users alter it in various ways)
+        m_name = m_name.lower().strip()
+        m_name = m_name.replace("-", "_")
+        if not m_name:
+            continue
+        match = NAME_MTCH.match(m_name)
+        if not match:
+            msg = "Matcher identifer '%s' is not in the right format" % (m_name)
+            raise ValueError(msg)
+        (m_name, m_ops) = match.groups()
+        m_ops = m_ops.strip().split(",")
+        m_ops = [m.strip().lower() for m in m_ops if m.strip()]
+        parsed_mergers.append((m_name, m_ops))
+    return parsed_mergers
+
+
+def default_mergers():
+    return tuple(string_extract_mergers(DEF_MERGE_TYPE))
+
+
+def construct(parsed_mergers):
+    mergers_to_be = []
+    for (m_name, m_ops) in parsed_mergers:
+        merger_locs = importer.find_module(m_name,
+                                           [__name__],
+                                           ['Merger'])
+        if not merger_locs:
+            msg = "Could not find merger named '%s'" % (m_name)
+            raise ImportError(msg)
+        else:
+            mod = importer.import_module(merger_locs[0])
+            mod_attr = getattr(mod, 'Merger')
+            mergers_to_be.append((mod_attr, m_ops))
+    # Now form them...
+    mergers = []
+    root = LookupMerger(mergers)
+    for (attr, opts) in mergers_to_be:
+        mergers.append(attr(root, opts))
+    return root
+
+
diff --git a/cloudinit/mergers/dict.py b/cloudinit/mergers/dict.py
new file mode 100644
index 00000000..45a7d3a5
--- /dev/null
+++ b/cloudinit/mergers/dict.py
@@ -0,0 +1,48 @@
+# vi: ts=4 expandtab
+#
+#    Copyright (C) 2012 Yahoo! Inc.
+#
+#    Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License version 3, as
+#    published by the Free Software Foundation.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+class Merger(object):
+    def __init__(self, merger, opts):
+        self._merger = merger
+        self._overwrite = 'overwrite' in opts
+
+    # This merging algorithm will attempt to merge with
+    # another dictionary, on encountering any other type of object
+    # it will not merge with said object, but will instead return
+    # the original value
+    #
+    # On encountering a dictionary, it will create a new dictionary
+    # composed of the original and the one to merge with, if 'overwrite'
+    # is enabled then keys that exist in the original will be overwritten
+    # by keys in the one to merge with (and associated values). Otherwise
+    # if not in overwrite mode the 2 conflicting keys themselves will
+    # be merged.
+    def _on_dict(self, value, merge_with):
+        if not isinstance(merge_with, (dict)):
+            return value
+        merged = dict(value)
+        for (k, v) in merge_with.items():
+            if k in merged:
+                if not self._overwrite:
+                    merged[k] = self._merger.merge(merged[k], v)
+                else:
+                    merged[k] = v
+            else:
+                merged[k] = v
+        return merged
diff --git a/cloudinit/mergers/list.py b/cloudinit/mergers/list.py
new file mode 100644
index 00000000..a56ff007
--- /dev/null
+++ b/cloudinit/mergers/list.py
@@ -0,0 +1,50 @@
+# vi: ts=4 expandtab
+#
+#    Copyright (C) 2012 Yahoo! Inc.
+#
+#    Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License version 3, as
+#    published by the Free Software Foundation.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+class Merger(object):
+    def __init__(self, merger, opts):
+        self._merger = merger
+        self._discard_non = 'discard_non_list' in opts
+        self._extend = 'extend' in opts
+
+    def _on_tuple(self, value, merge_with):
+        return self._on_list(list(value), merge_with)
+
+    # On encountering a list or tuple type this action will be applied
+    # a new list will be returned, if the value to merge with is itself
+    # a list and we have been told to 'extend', then the value here will
+    # be extended with the other list. If in 'extend' mode then we will
+    # attempt to merge instead, which means that values from the list
+    # to merge with will replace values in te original list (they will
+    # also be merged recursively).
+    #
+    # If the value to merge with is not a list, and we are set to discared
+    # then no modifications will take place, otherwise we will just append
+    # the value to merge with onto the end of our own list.
+    def _on_list(self, value, merge_with):
+        new_value = list(value)
+        if isinstance(merge_with, (tuple, list)):
+            if self._extend:
+                new_value.extend(merge_with)
+            else:
+                return new_value
+        else:
+            if not self._discard_non:
+                new_value.append(merge_with)
+        return new_value
diff --git a/cloudinit/mergers/str.py b/cloudinit/mergers/str.py
new file mode 100644
index 00000000..f1534c5b
--- /dev/null
+++ b/cloudinit/mergers/str.py
@@ -0,0 +1,39 @@
+# vi: ts=4 expandtab
+#
+#    Copyright (C) 2012 Yahoo! Inc.
+#
+#    Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License version 3, as
+#    published by the Free Software Foundation.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+class Merger(object):
+    def __init__(self, merger, opts):
+        self._append = 'append' in opts
+
+    # On encountering a unicode object to merge value with
+    # we will for now just proxy into the string method to let it handle it.
+    def _on_unicode(self, value, merge_with):
+        return self._on_str(value, merge_with)
+
+    # On encountering a string object to merge with we will
+    # perform the following action, if appending we will
+    # merge them together, otherwise we will just return value.
+    def _on_str(self, value, merge_with):
+        if not self._append:
+            return value
+        else:
+            if isinstance(value, (unicode)):
+                return value + unicode(merge_with)
+            else:
+                return value + str(merge_with)
diff --git a/cloudinit/sources/DataSourceAltCloud.py b/cloudinit/sources/DataSourceAltCloud.py
index 9812bdcb..64548d43 100644
--- a/cloudinit/sources/DataSourceAltCloud.py
+++ b/cloudinit/sources/DataSourceAltCloud.py
@@ -30,6 +30,7 @@ import os.path
 from cloudinit import log as logging
 from cloudinit import sources
 from cloudinit import util
+
 from cloudinit.util import ProcessExecutionError
 
 LOG = logging.getLogger(__name__)
@@ -91,8 +92,8 @@ class DataSourceAltCloud(sources.DataSource):
         self.supported_seed_starts = ("/", "file://")
 
     def __str__(self):
-        mstr = "%s [seed=%s]" % (util.obj_name(self), self.seed)
-        return mstr
+        root = sources.DataSource.__str__(self)
+        return "%s [seed=%s]" % (root, self.seed)
 
     def get_cloud_type(self):
         '''
diff --git a/cloudinit/sources/DataSourceCloudStack.py b/cloudinit/sources/DataSourceCloudStack.py
index 275caf0d..81c8cda9 100644
--- a/cloudinit/sources/DataSourceCloudStack.py
+++ b/cloudinit/sources/DataSourceCloudStack.py
@@ -48,9 +48,6 @@ class DataSourceCloudStack(sources.DataSource):
             raise RuntimeError("No virtual router found!")
         self.metadata_address = "http://%s/" % (vr_addr)
 
-    def __str__(self):
-        return util.obj_name(self)
-
     def _get_url_settings(self):
         mcfg = self.ds_cfg
         if not mcfg:
diff --git a/cloudinit/sources/DataSourceConfigDrive.py b/cloudinit/sources/DataSourceConfigDrive.py
index ec016a1d..5f152299 100644
--- a/cloudinit/sources/DataSourceConfigDrive.py
+++ b/cloudinit/sources/DataSourceConfigDrive.py
@@ -51,7 +51,9 @@ class DataSourceConfigDrive(sources.DataSource):
         self.ec2_metadata = None
 
     def __str__(self):
-        mstr = "%s [%s,ver=%s]" % (util.obj_name(self), self.dsmode,
+        root = sources.DataSource.__str__(self)
+        mstr = "%s [%s,ver=%s]" % (root,
+                                   self.dsmode,
                                    self.version)
         mstr += "[source=%s]" % (self.source)
         return mstr
@@ -152,7 +154,7 @@ class DataSourceConfigDrive(sources.DataSource):
             return False
 
         md = results['metadata']
-        md = util.mergedict(md, DEFAULT_METADATA)
+        md = util.mergemanydict([md, DEFAULT_METADATA])
 
         # Perform some metadata 'fixups'
         #
diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py
index 2db53446..f010e640 100644
--- a/cloudinit/sources/DataSourceEc2.py
+++ b/cloudinit/sources/DataSourceEc2.py
@@ -49,9 +49,6 @@ class DataSourceEc2(sources.DataSource):
         self.seed_dir = os.path.join(paths.seed_dir, "ec2")
         self.api_ver = DEF_MD_VERSION
 
-    def __str__(self):
-        return util.obj_name(self)
-
     def get_data(self):
         seed_ret = {}
         if util.read_optional_seed(seed_ret, base=(self.seed_dir + "/")):
diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py
index b55d8a21..612d8ffa 100644
--- a/cloudinit/sources/DataSourceMAAS.py
+++ b/cloudinit/sources/DataSourceMAAS.py
@@ -50,7 +50,8 @@ class DataSourceMAAS(sources.DataSource):
         self.oauth_clockskew = None
 
     def __str__(self):
-        return "%s [%s]" % (util.obj_name(self), self.base_url)
+        root = sources.DataSource.__str__(self)
+        return "%s [%s]" % (root, self.base_url)
 
     def get_data(self):
         mcfg = self.ds_cfg
diff --git a/cloudinit/sources/DataSourceNoCloud.py b/cloudinit/sources/DataSourceNoCloud.py
index 603f0155..08a853cc 100644
--- a/cloudinit/sources/DataSourceNoCloud.py
+++ b/cloudinit/sources/DataSourceNoCloud.py
@@ -40,9 +40,8 @@ class DataSourceNoCloud(sources.DataSource):
         self.supported_seed_starts = ("/", "file://")
 
     def __str__(self):
-        mstr = "%s [seed=%s][dsmode=%s]" % (util.obj_name(self),
-                                            self.seed, self.dsmode)
-        return mstr
+        root = sources.DataSource.__str__(self)
+        return "%s [seed=%s][dsmode=%s]" % (root, self.seed, self.dsmode)
 
     def get_data(self):
         defaults = {
@@ -65,7 +64,7 @@ class DataSourceNoCloud(sources.DataSource):
         # Check to see if the seed dir has data.
         seedret = {}
         if util.read_optional_seed(seedret, base=self.seed_dir + "/"):
-            md = util.mergedict(md, seedret['meta-data'])
+            md = util.mergemanydict([md, seedret['meta-data']])
             ud = seedret['user-data']
             found.append(self.seed_dir)
             LOG.debug("Using seeded cache data from %s", self.seed_dir)
@@ -82,7 +81,7 @@ class DataSourceNoCloud(sources.DataSource):
             if self.ds_cfg['user-data']:
                 ud = self.ds_cfg['user-data']
             if self.ds_cfg['meta-data'] is not False:
-                md = util.mergedict(md, self.ds_cfg['meta-data'])
+                md = util.mergemanydict([md, self.ds_cfg['meta-data']])
             if 'ds_config' not in found:
                 found.append("ds_config")
 
@@ -100,7 +99,7 @@ class DataSourceNoCloud(sources.DataSource):
                     LOG.debug("Attempting to use data from %s", dev)
 
                     (newmd, newud) = util.mount_cb(dev, util.read_seeded)
-                    md = util.mergedict(newmd, md)
+                    md = util.mergemanydict([newmd, md])
                     ud = newud
 
                     # For seed from a device, the default mode is 'net'.
@@ -150,11 +149,11 @@ class DataSourceNoCloud(sources.DataSource):
             LOG.debug("Using seeded cache data from %s", seedfrom)
 
             # Values in the command line override those from the seed
-            md = util.mergedict(md, md_seed)
+            md = util.mergemanydict([md, md_seed])
             found.append(seedfrom)
 
         # Now that we have exhausted any other places merge in the defaults
-        md = util.mergedict(md, defaults)
+        md = util.mergemanydict([md, defaults])
 
         # Update the network-interfaces if metadata had 'network-interfaces'
         # entry and this is the local datasource, or 'seedfrom' was used
diff --git a/cloudinit/sources/DataSourceNone.py b/cloudinit/sources/DataSourceNone.py
index c2125bee..e2175e1f 100644
--- a/cloudinit/sources/DataSourceNone.py
+++ b/cloudinit/sources/DataSourceNone.py
@@ -41,9 +41,6 @@ class DataSourceNone(sources.DataSource):
     def get_instance_id(self):
         return 'iid-datasource-none'
 
-    def __str__(self):
-        return util.obj_name(self)
-
     @property
     def is_disconnected(self):
         return True
diff --git a/cloudinit/sources/DataSourceOVF.py b/cloudinit/sources/DataSourceOVF.py
index e90150c6..0530c4b7 100644
--- a/cloudinit/sources/DataSourceOVF.py
+++ b/cloudinit/sources/DataSourceOVF.py
@@ -43,7 +43,8 @@ class DataSourceOVF(sources.DataSource):
         self.supported_seed_starts = ("/", "file://")
 
     def __str__(self):
-        return "%s [seed=%s]" % (util.obj_name(self), self.seed)
+        root = sources.DataSource.__str__(self)
+        return "%s [seed=%s]" % (root, self.seed)
 
     def get_data(self):
         found = []
@@ -93,11 +94,11 @@ class DataSourceOVF(sources.DataSource):
             (md_seed, ud) = util.read_seeded(seedfrom, timeout=None)
             LOG.debug("Using seeded cache data from %s", seedfrom)
 
-            md = util.mergedict(md, md_seed)
+            md = util.mergemanydict([md, md_seed])
             found.append(seedfrom)
 
         # Now that we have exhausted any other places merge in the defaults
-        md = util.mergedict(md, defaults)
+        md = util.mergemanydict([md, defaults])
 
         self.seed = ",".join(found)
         self.metadata = md
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index 96baff90..d8fbacdd 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -25,6 +25,7 @@ import os
 
 from cloudinit import importer
 from cloudinit import log as logging
+from cloudinit import type_utils
 from cloudinit import user_data as ud
 from cloudinit import util
 
@@ -52,7 +53,7 @@ class DataSource(object):
         self.userdata = None
         self.metadata = None
         self.userdata_raw = None
-        name = util.obj_name(self)
+        name = type_utils.obj_name(self)
         if name.startswith(DS_PREFIX):
             name = name[len(DS_PREFIX):]
         self.ds_cfg = util.get_cfg_by_path(self.sys_cfg,
@@ -62,6 +63,9 @@ class DataSource(object):
         else:
             self.ud_proc = ud_proc
 
+    def __str__(self):
+        return type_utils.obj_name(self)
+
     def get_userdata(self, apply_filter=False):
         if self.userdata is None:
             self.userdata = self.ud_proc.process(self.get_userdata_raw())
@@ -214,7 +218,7 @@ def normalize_pubkey_data(pubkey_data):
 
 def find_source(sys_cfg, distro, paths, ds_deps, cfg_list, pkg_list):
     ds_list = list_sources(cfg_list, ds_deps, pkg_list)
-    ds_names = [util.obj_name(f) for f in ds_list]
+    ds_names = [type_utils.obj_name(f) for f in ds_list]
     LOG.debug("Searching for data source in: %s", ds_names)
 
     for cls in ds_list:
@@ -222,7 +226,7 @@ def find_source(sys_cfg, distro, paths, ds_deps, cfg_list, pkg_list):
             LOG.debug("Seeing if we can get any data from %s", cls)
             s = cls(sys_cfg, distro, paths)
             if s.get_data():
-                return (s, util.obj_name(cls))
+                return (s, type_utils.obj_name(cls))
         except Exception:
             util.logexc(LOG, "Getting data from %s failed", cls)
 
diff --git a/cloudinit/stages.py b/cloudinit/stages.py
index d7d1dea0..543d247f 100644
--- a/cloudinit/stages.py
+++ b/cloudinit/stages.py
@@ -43,6 +43,7 @@ from cloudinit import helpers
 from cloudinit import importer
 from cloudinit import log as logging
 from cloudinit import sources
+from cloudinit import type_utils
 from cloudinit import util
 
 LOG = logging.getLogger(__name__)
@@ -211,7 +212,7 @@ class Init(object):
         # Any config provided???
         pkg_list = self.cfg.get('datasource_pkg_list') or []
         # Add the defaults at the end
-        for n in ['', util.obj_name(sources)]:
+        for n in ['', type_utils.obj_name(sources)]:
             if n not in pkg_list:
                 pkg_list.append(n)
         cfg_list = self.cfg.get('datasource_list') or []
@@ -271,7 +272,7 @@ class Init(object):
         dp = self.paths.get_cpath('data')
 
         # Write what the datasource was and is..
-        ds = "%s: %s" % (util.obj_name(self.datasource), self.datasource)
+        ds = "%s: %s" % (type_utils.obj_name(self.datasource), self.datasource)
         previous_ds = None
         ds_fn = os.path.join(idir, 'datasource')
         try:
@@ -488,7 +489,7 @@ class Modules(object):
             else:
                 raise TypeError(("Failed to read '%s' item in config,"
                                  " unknown type %s") %
-                                 (item, util.obj_name(item)))
+                                 (item, type_utils.obj_name(item)))
         return module_list
 
     def _fixup_modules(self, raw_mods):
@@ -506,7 +507,7 @@ class Modules(object):
                 # Reset it so when ran it will get set to a known value
                 freq = None
             mod_locs = importer.find_module(mod_name,
-                                            ['', util.obj_name(config)],
+                                            ['', type_utils.obj_name(config)],
                                             ['handle'])
             if not mod_locs:
                 LOG.warn("Could not find module named %s", mod_name)
diff --git a/cloudinit/type_utils.py b/cloudinit/type_utils.py
new file mode 100644
index 00000000..2decbfc5
--- /dev/null
+++ b/cloudinit/type_utils.py
@@ -0,0 +1,34 @@
+# vi: ts=4 expandtab
+#
+#    Copyright (C) 2012 Canonical Ltd.
+#    Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
+#    Copyright (C) 2012 Yahoo! Inc.
+#
+#    Author: Scott Moser <scott.moser@canonical.com>
+#    Author: Juerg Haefliger <juerg.haefliger@hp.com>
+#    Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License version 3, as
+#    published by the Free Software Foundation.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# pylint: disable=C0302
+
+import types
+
+
+def obj_name(obj):
+    if isinstance(obj, (types.TypeType,
+                        types.ModuleType,
+                        types.FunctionType,
+                        types.LambdaType)):
+        return str(obj.__name__)
+    return obj_name(obj.__class__)
diff --git a/cloudinit/util.py b/cloudinit/util.py
index afde2066..709d5cca 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -43,14 +43,15 @@ import subprocess
 import sys
 import tempfile
 import time
-import types
 import urlparse
 
 import yaml
 
 from cloudinit import importer
 from cloudinit import log as logging
+from cloudinit import mergers
 from cloudinit import safeyaml
+from cloudinit import type_utils
 from cloudinit import url_helper as uhelp
 from cloudinit import version
 
@@ -194,11 +195,12 @@ def fork_cb(child_cb, *args):
             os._exit(0)  # pylint: disable=W0212
         except:
             logexc(LOG, ("Failed forking and"
-                         " calling callback %s"), obj_name(child_cb))
+                         " calling callback %s"),
+                   type_utils.obj_name(child_cb))
             os._exit(1)  # pylint: disable=W0212
     else:
         LOG.debug("Forked child %s who will run callback %s",
-                  fid, obj_name(child_cb))
+                  fid, type_utils.obj_name(child_cb))
 
 
 def is_true(val, addons=None):
@@ -512,40 +514,26 @@ def make_url(scheme, host, port=None,
     return urlparse.urlunparse(pieces)
 
 
-def obj_name(obj):
-    if isinstance(obj, (types.TypeType,
-                        types.ModuleType,
-                        types.FunctionType,
-                        types.LambdaType)):
-        return str(obj.__name__)
-    return obj_name(obj.__class__)
-
-
 def mergemanydict(srcs, reverse=False):
     if reverse:
         srcs = reversed(srcs)
     m_cfg = {}
+    merge_how = [mergers.default_mergers()]
     for a_cfg in srcs:
         if a_cfg:
-            m_cfg = mergedict(m_cfg, a_cfg)
+            # Take the last merger as the one that
+            # will define how to merge next...
+            mergers_to_apply = list(merge_how[-1])
+            merger = mergers.construct(mergers_to_apply)
+            m_cfg = merger.merge(m_cfg, a_cfg)
+            # If the config has now has new merger set,
+            # extract them to be used next time...
+            new_mergers = mergers.dict_extract_mergers(m_cfg)
+            if new_mergers:
+                merge_how.append(new_mergers)
     return m_cfg
 
 
-def mergedict(src, cand):
-    """
-    Merge values from C{cand} into C{src}.
-    If C{src} has a key C{cand} will not override.
-    Nested dictionaries are merged recursively.
-    """
-    if isinstance(src, dict) and isinstance(cand, dict):
-        for (k, v) in cand.iteritems():
-            if k not in src:
-                src[k] = v
-            else:
-                src[k] = mergedict(src[k], v)
-    return src
-
-
 @contextlib.contextmanager
 def chdir(ndir):
     curr = os.getcwd()
@@ -644,7 +632,7 @@ def load_yaml(blob, default=None, allowed=(dict,)):
             # Yes this will just be caught, but thats ok for now...
             raise TypeError(("Yaml load allows %s root types,"
                              " but got %s instead") %
-                            (allowed, obj_name(converted)))
+                            (allowed, type_utils.obj_name(converted)))
         loaded = converted
     except (yaml.YAMLError, TypeError, ValueError):
         if len(blob) == 0:
@@ -713,7 +701,7 @@ def read_conf_with_confd(cfgfile):
             if not isinstance(confd, (str, basestring)):
                 raise TypeError(("Config file %s contains 'conf_d' "
                                  "with non-string type %s") %
-                                 (cfgfile, obj_name(confd)))
+                                 (cfgfile, type_utils.obj_name(confd)))
             else:
                 confd = str(confd).strip()
     elif os.path.isdir("%s.d" % cfgfile):
@@ -724,7 +712,7 @@ def read_conf_with_confd(cfgfile):
 
     # Conf.d settings override input configuration
     confd_cfg = read_conf_d(confd)
-    return mergedict(confd_cfg, cfg)
+    return mergemanydict([confd_cfg, cfg])
 
 
 def read_cc_from_cmdline(cmdline=None):
@@ -1471,7 +1459,7 @@ def shellify(cmdlist, add_header=True):
         else:
             raise RuntimeError(("Unable to shellify type %s"
                                 " which is not a list or string")
-                               % (obj_name(args)))
+                               % (type_utils.obj_name(args)))
     LOG.debug("Shellified %s commands.", cmds_made)
     return content