summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog2
-rw-r--r--cloudinit/config/cc_landscape.py4
-rw-r--r--cloudinit/config/cc_mounts.py3
-rw-r--r--cloudinit/distros/__init__.py19
-rw-r--r--cloudinit/handlers/__init__.py75
-rw-r--r--cloudinit/handlers/boot_hook.py3
-rw-r--r--cloudinit/handlers/cloud_config.py92
-rw-r--r--cloudinit/handlers/shell_script.py3
-rw-r--r--cloudinit/handlers/upstart_job.py3
-rw-r--r--cloudinit/helpers.py3
-rw-r--r--cloudinit/mergers/__init__.py154
-rw-r--r--cloudinit/mergers/dict.py48
-rw-r--r--cloudinit/mergers/list.py50
-rw-r--r--cloudinit/mergers/str.py39
-rw-r--r--cloudinit/sources/DataSourceAltCloud.py5
-rw-r--r--cloudinit/sources/DataSourceCloudStack.py3
-rw-r--r--cloudinit/sources/DataSourceConfigDrive.py6
-rw-r--r--cloudinit/sources/DataSourceEc2.py3
-rw-r--r--cloudinit/sources/DataSourceMAAS.py3
-rw-r--r--cloudinit/sources/DataSourceNoCloud.py15
-rw-r--r--cloudinit/sources/DataSourceNone.py4
-rw-r--r--cloudinit/sources/DataSourceOVF.py7
-rw-r--r--cloudinit/sources/__init__.py10
-rw-r--r--cloudinit/stages.py9
-rw-r--r--cloudinit/type_utils.py34
-rw-r--r--cloudinit/util.py52
-rw-r--r--doc/merging.txt179
-rw-r--r--tests/unittests/test__init__.py26
-rw-r--r--tests/unittests/test_merging.py204
-rw-r--r--tests/unittests/test_userdata.py107
30 files changed, 949 insertions, 216 deletions
diff --git a/ChangeLog b/ChangeLog
index d035a7a3..54f49da2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -48,6 +48,8 @@
output does not get a 'killed by TERM signal' message.
- support resizing partitions via growpart or parted (LP: #1136936)
- allow specifying apt-get command in distro config ('apt_get_command')
+ - support different and user-suppliable merging algorithms for cloud-config
+ (LP: #1023179)
0.7.1:
- sysvinit: fix missing dependency in cloud-init job for RHEL 5.6
diff --git a/cloudinit/config/cc_landscape.py b/cloudinit/config/cc_landscape.py
index 2efdff79..8a709677 100644
--- a/cloudinit/config/cc_landscape.py
+++ b/cloudinit/config/cc_landscape.py
@@ -24,6 +24,7 @@ from StringIO import StringIO
from configobj import ConfigObj
+from cloudinit import type_utils
from cloudinit import util
from cloudinit.settings import PER_INSTANCE
@@ -58,7 +59,8 @@ def handle(_name, cfg, cloud, log, _args):
if not isinstance(ls_cloudcfg, (dict)):
raise RuntimeError(("'landscape' key existed in config,"
" but not a dictionary type,"
- " is a %s instead"), util.obj_name(ls_cloudcfg))
+ " is a %s instead"),
+ type_utils.obj_name(ls_cloudcfg))
if not ls_cloudcfg:
return
diff --git a/cloudinit/config/cc_mounts.py b/cloudinit/config/cc_mounts.py
index 9010d97f..390ba711 100644
--- a/cloudinit/config/cc_mounts.py
+++ b/cloudinit/config/cc_mounts.py
@@ -22,6 +22,7 @@ from string import whitespace # pylint: disable=W0402
import re
+from cloudinit import type_utils
from cloudinit import util
# Shortname matches 'sda', 'sda1', 'xvda', 'hda', 'sdb', xvdb, vda, vdd1, sr0
@@ -60,7 +61,7 @@ def handle(_name, cfg, cloud, log, _args):
# skip something that wasn't a list
if not isinstance(cfgmnt[i], list):
log.warn("Mount option %s not a list, got a %s instead",
- (i + 1), util.obj_name(cfgmnt[i]))
+ (i + 1), type_utils.obj_name(cfgmnt[i]))
continue
startname = str(cfgmnt[i][0])
diff --git a/cloudinit/distros/__init__.py b/cloudinit/distros/__init__.py
index 2a2d8216..50d52594 100644
--- a/cloudinit/distros/__init__.py
+++ b/cloudinit/distros/__init__.py
@@ -31,6 +31,7 @@ import re
from cloudinit import importer
from cloudinit import log as logging
from cloudinit import ssh_util
+from cloudinit import type_utils
from cloudinit import util
from cloudinit.distros.parsers import hosts
@@ -445,7 +446,7 @@ class Distro(object):
lines.append("%s %s" % (user, rules))
else:
msg = "Can not create sudoers rule addition with type %r"
- raise TypeError(msg % (util.obj_name(rules)))
+ raise TypeError(msg % (type_utils.obj_name(rules)))
content = "\n".join(lines)
content += "\n" # trailing newline
@@ -568,7 +569,7 @@ def _normalize_groups(grp_cfg):
c_grp_cfg[k] = [v]
else:
raise TypeError("Bad group member type %s" %
- util.obj_name(v))
+ type_utils.obj_name(v))
else:
if isinstance(v, (list)):
c_grp_cfg[k].extend(v)
@@ -576,13 +577,13 @@ def _normalize_groups(grp_cfg):
c_grp_cfg[k].append(v)
else:
raise TypeError("Bad group member type %s" %
- util.obj_name(v))
+ type_utils.obj_name(v))
elif isinstance(i, (str, basestring)):
if i not in c_grp_cfg:
c_grp_cfg[i] = []
else:
raise TypeError("Unknown group name type %s" %
- util.obj_name(i))
+ type_utils.obj_name(i))
grp_cfg = c_grp_cfg
groups = {}
if isinstance(grp_cfg, (dict)):
@@ -591,7 +592,7 @@ def _normalize_groups(grp_cfg):
else:
raise TypeError(("Group config must be list, dict "
" or string types only and not %s") %
- util.obj_name(grp_cfg))
+ type_utils.obj_name(grp_cfg))
return groups
@@ -622,7 +623,7 @@ def _normalize_users(u_cfg, def_user_cfg=None):
ad_ucfg.append(v)
else:
raise TypeError(("Unmappable user value type %s"
- " for key %s") % (util.obj_name(v), k))
+ " for key %s") % (type_utils.obj_name(v), k))
u_cfg = ad_ucfg
elif isinstance(u_cfg, (str, basestring)):
u_cfg = util.uniq_merge_sorted(u_cfg)
@@ -647,7 +648,7 @@ def _normalize_users(u_cfg, def_user_cfg=None):
else:
raise TypeError(("User config must be dictionary/list "
" or string types only and not %s") %
- util.obj_name(user_config))
+ type_utils.obj_name(user_config))
# Ensure user options are in the right python friendly format
if users:
@@ -740,7 +741,7 @@ def normalize_users_groups(cfg, distro):
}
if not isinstance(old_user, (dict)):
LOG.warn(("Format for 'user' key must be a string or "
- "dictionary and not %s"), util.obj_name(old_user))
+ "dictionary and not %s"), type_utils.obj_name(old_user))
old_user = {}
# If no old user format, then assume the distro
@@ -766,7 +767,7 @@ def normalize_users_groups(cfg, distro):
if not isinstance(base_users, (list, dict, str, basestring)):
LOG.warn(("Format for 'users' key must be a comma separated string"
" or a dictionary or a list and not %s"),
- util.obj_name(base_users))
+ type_utils.obj_name(base_users))
base_users = []
if old_user:
diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py
index 8d6dcd4d..924463ce 100644
--- a/cloudinit/handlers/__init__.py
+++ b/cloudinit/handlers/__init__.py
@@ -27,6 +27,7 @@ from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE, FREQUENCIES)
from cloudinit import importer
from cloudinit import log as logging
+from cloudinit import type_utils
from cloudinit import util
LOG = logging.getLogger(__name__)
@@ -69,7 +70,6 @@ INCLUSION_SRCH = sorted(list(INCLUSION_TYPES_MAP.keys()),
class Handler(object):
-
__metaclass__ = abc.ABCMeta
def __init__(self, frequency, version=2):
@@ -77,53 +77,66 @@ class Handler(object):
self.frequency = frequency
def __repr__(self):
- return "%s: [%s]" % (util.obj_name(self), self.list_types())
+ return "%s: [%s]" % (type_utils.obj_name(self), self.list_types())
@abc.abstractmethod
def list_types(self):
raise NotImplementedError()
- def handle_part(self, data, ctype, filename, payload, frequency):
- return self._handle_part(data, ctype, filename, payload, frequency)
-
@abc.abstractmethod
- def _handle_part(self, data, ctype, filename, payload, frequency):
+ def handle_part(self, *args, **kwargs):
raise NotImplementedError()
-def run_part(mod, data, ctype, filename, payload, frequency):
+def run_part(mod, data, filename, payload, frequency, headers):
mod_freq = mod.frequency
if not (mod_freq == PER_ALWAYS or
(frequency == PER_INSTANCE and mod_freq == PER_INSTANCE)):
return
- mod_ver = mod.handler_version
# Sanity checks on version (should be an int convertable)
try:
+ mod_ver = mod.handler_version
mod_ver = int(mod_ver)
- except:
+ except (TypeError, ValueError, AttributeError):
mod_ver = 1
+ content_type = headers['Content-Type']
try:
LOG.debug("Calling handler %s (%s, %s, %s) with frequency %s",
- mod, ctype, filename, mod_ver, frequency)
- if mod_ver >= 2:
+ mod, content_type, filename, mod_ver, frequency)
+ if mod_ver == 3:
+ # Treat as v. 3 which does get a frequency + headers
+ mod.handle_part(data, content_type, filename,
+ payload, frequency, headers)
+ elif mod_ver == 2:
# Treat as v. 2 which does get a frequency
- mod.handle_part(data, ctype, filename, payload, frequency)
- else:
+ mod.handle_part(data, content_type, filename,
+ payload, frequency)
+ elif mod_ver == 1:
# Treat as v. 1 which gets no frequency
- mod.handle_part(data, ctype, filename, payload)
+ mod.handle_part(data, content_type, filename, payload)
+ else:
+ raise ValueError("Unknown module version %s" % (mod_ver))
except:
util.logexc(LOG, ("Failed calling handler %s (%s, %s, %s)"
" with frequency %s"),
- mod, ctype, filename,
+ mod, content_type, filename,
mod_ver, frequency)
def call_begin(mod, data, frequency):
- run_part(mod, data, CONTENT_START, None, None, frequency)
+ # Create a fake header set
+ headers = {
+ 'Content-Type': CONTENT_START,
+ }
+ run_part(mod, data, None, None, frequency, headers)
def call_end(mod, data, frequency):
- run_part(mod, data, CONTENT_END, None, None, frequency)
+ # Create a fake header set
+ headers = {
+ 'Content-Type': CONTENT_END,
+ }
+ run_part(mod, data, None, None, frequency, headers)
def walker_handle_handler(pdata, _ctype, _filename, payload):
@@ -173,26 +186,27 @@ def _escape_string(text):
return text
-def walker_callback(pdata, ctype, filename, payload):
- if ctype in PART_CONTENT_TYPES:
- walker_handle_handler(pdata, ctype, filename, payload)
+def walker_callback(data, filename, payload, headers):
+ content_type = headers['Content-Type']
+ if content_type in PART_CONTENT_TYPES:
+ walker_handle_handler(data, content_type, filename, payload)
return
- handlers = pdata['handlers']
- if ctype in pdata['handlers']:
- run_part(handlers[ctype], pdata['data'], ctype, filename,
- payload, pdata['frequency'])
+ handlers = data['handlers']
+ if content_type in handlers:
+ run_part(handlers[content_type], data['data'], filename,
+ payload, data['frequency'], headers)
elif payload:
# Extract the first line or 24 bytes for displaying in the log
start = _extract_first_or_bytes(payload, 24)
details = "'%s...'" % (_escape_string(start))
- if ctype == NOT_MULTIPART_TYPE:
+ if content_type == NOT_MULTIPART_TYPE:
LOG.warning("Unhandled non-multipart (%s) userdata: %s",
- ctype, details)
+ content_type, details)
else:
LOG.warning("Unhandled unknown content-type (%s) userdata: %s",
- ctype, details)
+ content_type, details)
else:
- LOG.debug("empty payload of type %s" % ctype)
+ LOG.debug("Empty payload of type %s", content_type)
# Callback is a function that will be called with
@@ -212,7 +226,10 @@ def walk(msg, callback, data):
if not filename:
filename = PART_FN_TPL % (partnum)
- callback(data, ctype, filename, part.get_payload(decode=True))
+ headers = dict(part)
+ LOG.debug(headers)
+ headers['Content-Type'] = ctype
+ callback(data, filename, part.get_payload(decode=True), headers)
partnum = partnum + 1
diff --git a/cloudinit/handlers/boot_hook.py b/cloudinit/handlers/boot_hook.py
index 456b8020..bf2899ab 100644
--- a/cloudinit/handlers/boot_hook.py
+++ b/cloudinit/handlers/boot_hook.py
@@ -56,7 +56,8 @@ class BootHookPartHandler(handlers.Handler):
util.write_file(filepath, contents, 0700)
return filepath
- def _handle_part(self, _data, ctype, filename, payload, _frequency):
+ def handle_part(self, _data, ctype, filename, # pylint: disable=W0221
+ payload, frequency): # pylint: disable=W0613
if ctype in handlers.CONTENT_SIGNALS:
return
diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py
index f6d95244..d30d6338 100644
--- a/cloudinit/handlers/cloud_config.py
+++ b/cloudinit/handlers/cloud_config.py
@@ -22,41 +22,103 @@
from cloudinit import handlers
from cloudinit import log as logging
+from cloudinit import mergers
from cloudinit import util
from cloudinit.settings import (PER_ALWAYS)
LOG = logging.getLogger(__name__)
+MERGE_HEADER = 'Merge-Type'
+DEF_MERGERS = mergers.default_mergers()
+
class CloudConfigPartHandler(handlers.Handler):
def __init__(self, paths, **_kwargs):
- handlers.Handler.__init__(self, PER_ALWAYS)
- self.cloud_buf = []
+ handlers.Handler.__init__(self, PER_ALWAYS, version=3)
+ self.cloud_buf = None
self.cloud_fn = paths.get_ipath("cloud_config")
+ self.file_names = []
+ self.mergers = [DEF_MERGERS]
def list_types(self):
return [
handlers.type_from_starts_with("#cloud-config"),
]
- def _write_cloud_config(self, buf):
+ def _write_cloud_config(self):
if not self.cloud_fn:
return
- lines = [str(b) for b in buf]
- payload = "\n".join(lines)
- util.write_file(self.cloud_fn, payload, 0600)
+ # Capture which files we merged from...
+ file_lines = []
+ if self.file_names:
+ file_lines.append("# from %s files" % (len(self.file_names)))
+ for fn in self.file_names:
+ file_lines.append("# %s" % (fn))
+ file_lines.append("")
+ if self.cloud_buf is not None:
+ # Something was actually gathered....
+ lines = [
+ "#cloud-config",
+ '',
+ ]
+ lines.extend(file_lines)
+ lines.append(util.yaml_dumps(self.cloud_buf))
+ else:
+ lines = []
+ util.write_file(self.cloud_fn, "\n".join(lines), 0600)
+
+ def _extract_mergers(self, payload, headers):
+ merge_header_headers = ''
+ for h in [MERGE_HEADER, 'X-%s' % (MERGE_HEADER)]:
+ tmp_h = headers.get(h, '')
+ if tmp_h:
+ merge_header_headers = tmp_h
+ break
+ # Select either the merge-type from the content
+ # or the merge type from the headers or default to our own set
+ # if neither exists (or is empty) from the later.
+ payload_yaml = util.load_yaml(payload)
+ mergers_yaml = mergers.dict_extract_mergers(payload_yaml)
+ mergers_header = mergers.string_extract_mergers(merge_header_headers)
+ all_mergers = []
+ all_mergers.extend(mergers_yaml)
+ all_mergers.extend(mergers_header)
+ if not all_mergers:
+ all_mergers = DEF_MERGERS
+ return all_mergers
+
+ def _merge_part(self, payload, headers):
+ next_mergers = self._extract_mergers(payload, headers)
+ # Use the merger list from the last call, since it is the one
+ # that will be defining how to merge with the next payload.
+ curr_mergers = list(self.mergers[-1])
+ LOG.debug("Merging by applying %s", curr_mergers)
+ self.mergers.append(next_mergers)
+ merger = mergers.construct(curr_mergers)
+ if self.cloud_buf is None:
+ # First time through, merge with an empty dict...
+ self.cloud_buf = {}
+ self.cloud_buf = merger.merge(self.cloud_buf,
+ util.load_yaml(payload))
- def _handle_part(self, _data, ctype, filename, payload, _frequency):
+ def _reset(self):
+ self.file_names = []
+ self.cloud_buf = None
+ self.mergers = [DEF_MERGERS]
+
+ def handle_part(self, _data, ctype, filename, # pylint: disable=W0221
+ payload, _frequency, headers): # pylint: disable=W0613
if ctype == handlers.CONTENT_START:
- self.cloud_buf = []
+ self._reset()
return
if ctype == handlers.CONTENT_END:
- self._write_cloud_config(self.cloud_buf)
- self.cloud_buf = []
+ self._write_cloud_config()
+ self._reset()
return
-
- filename = util.clean_filename(filename)
- if not filename:
- filename = '??'
- self.cloud_buf.extend(["#%s" % (filename), str(payload)])
+ try:
+ self._merge_part(payload, headers)
+ self.file_names.append(filename)
+ except:
+ util.logexc(LOG, "Failed at merging in cloud config part from %s",
+ filename)
diff --git a/cloudinit/handlers/shell_script.py b/cloudinit/handlers/shell_script.py
index 6c5c11ca..b185c374 100644
--- a/cloudinit/handlers/shell_script.py
+++ b/cloudinit/handlers/shell_script.py
@@ -41,7 +41,8 @@ class ShellScriptPartHandler(handlers.Handler):
handlers.type_from_starts_with("#!"),
]
- def _handle_part(self, _data, ctype, filename, payload, _frequency):
+ def handle_part(self, _data, ctype, filename, # pylint: disable=W0221
+ payload, frequency): # pylint: disable=W0613
if ctype in handlers.CONTENT_SIGNALS:
# TODO(harlowja): maybe delete existing things here
return
diff --git a/cloudinit/handlers/upstart_job.py b/cloudinit/handlers/upstart_job.py
index 0aa7446e..edd56527 100644
--- a/cloudinit/handlers/upstart_job.py
+++ b/cloudinit/handlers/upstart_job.py
@@ -42,7 +42,8 @@ class UpstartJobPartHandler(handlers.Handler):
handlers.type_from_starts_with("#upstart-job"),
]
- def _handle_part(self, _data, ctype, filename, payload, frequency):
+ def handle_part(self, _data, ctype, filename, # pylint: disable=W0221
+ payload, frequency):
if ctype in handlers.CONTENT_SIGNALS:
return
diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py
index 2077401c..a4e6fb03 100644
--- a/cloudinit/helpers.py
+++ b/cloudinit/helpers.py
@@ -32,6 +32,7 @@ from cloudinit.settings import (PER_INSTANCE, PER_ALWAYS, PER_ONCE,
CFG_ENV_NAME)
from cloudinit import log as logging
+from cloudinit import type_utils
from cloudinit import util
LOG = logging.getLogger(__name__)
@@ -68,7 +69,7 @@ class FileLock(object):
self.fn = fn
def __str__(self):
- return "<%s using file %r>" % (util.obj_name(self), self.fn)
+ return "<%s using file %r>" % (type_utils.obj_name(self), self.fn)
def canon_sem_name(name):
diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py
new file mode 100644
index 00000000..e1ff57ba
--- /dev/null
+++ b/cloudinit/mergers/__init__.py
@@ -0,0 +1,154 @@
+# vi: ts=4 expandtab
+#
+# Copyright (C) 2012 Yahoo! Inc.
+#
+# Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3, as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import re
+
+from cloudinit import importer
+from cloudinit import log as logging
+from cloudinit import type_utils
+
+NAME_MTCH = re.compile(r"(^[a-zA-Z_][A-Za-z0-9_]*)\((.*?)\)$")
+
+LOG = logging.getLogger(__name__)
+DEF_MERGE_TYPE = "list()+dict()+str()"
+
+
+class UnknownMerger(object):
+ # Named differently so auto-method finding
+ # doesn't pick this up if there is ever a type
+ # named "unknown"
+ def _handle_unknown(self, _meth_wanted, value, _merge_with):
+ return value
+
+ # This merging will attempt to look for a '_on_X' method
+ # in our own object for a given object Y with type X,
+ # if found it will be called to perform the merge of a source
+ # object and a object to merge_with.
+ #
+ # If not found the merge will be given to a '_handle_unknown'
+ # function which can decide what to do wit the 2 values.
+ def merge(self, source, merge_with):
+ type_name = type_utils.obj_name(source)
+ type_name = type_name.lower()
+ method_name = "_on_%s" % (type_name)
+ meth = None
+ args = [source, merge_with]
+ if hasattr(self, method_name):
+ meth = getattr(self, method_name)
+ if not meth:
+ meth = self._handle_unknown
+ args.insert(0, method_name)
+ return meth(*args)
+
+
+class LookupMerger(UnknownMerger):
+ def __init__(self, lookups=None):
+ UnknownMerger.__init__(self)
+ if lookups is None:
+ self._lookups = []
+ else:
+ self._lookups = lookups
+
+ # For items which can not be merged by the parent this object
+ # will lookup in a internally maintained set of objects and
+ # find which one of those objects can perform the merge. If
+ # any of the contained objects have the needed method, they
+ # will be called to perform the merge.
+ def _handle_unknown(self, meth_wanted, value, merge_with):
+ meth = None
+ for merger in self._lookups:
+ if hasattr(merger, meth_wanted):
+ # First one that has that method/attr gets to be
+ # the one that will be called
+ meth = getattr(merger, meth_wanted)
+ break
+ if not meth:
+ return UnknownMerger._handle_unknown(self, meth_wanted,
+ value, merge_with)
+ return meth(value, merge_with)
+
+
+def dict_extract_mergers(config):
+ parsed_mergers = []
+ raw_mergers = config.get('merge_how')
+ if raw_mergers is None:
+ raw_mergers = config.get('merge_type')
+ if raw_mergers is None:
+ return parsed_mergers
+ if isinstance(raw_mergers, (str, basestring)):
+ return string_extract_mergers(raw_mergers)
+ for m in raw_mergers:
+ if isinstance(m, (dict)):
+ name = m['name']
+ name = name.replace("-", "_").strip()
+ opts = m['settings']
+ else:
+ name = m[0]
+ if len(m) >= 2:
+ opts = m[1:]
+ else:
+ opts = []
+ if name:
+ parsed_mergers.append((name, opts))
+ return parsed_mergers
+
+
+def string_extract_mergers(merge_how):
+ parsed_mergers = []
+ for m_name in merge_how.split("+"):
+ # Canonicalize the name (so that it can be found
+ # even when users alter it in various ways)
+ m_name = m_name.lower().strip()
+ m_name = m_name.replace("-", "_")
+ if not m_name:
+ continue
+ match = NAME_MTCH.match(m_name)
+ if not match:
+ msg = ("Matcher identifer '%s' is not in the right format" %
+ (m_name))
+ raise ValueError(msg)
+ (m_name, m_ops) = match.groups()
+ m_ops = m_ops.strip().split(",")
+ m_ops = [m.strip().lower() for m in m_ops if m.strip()]
+ parsed_mergers.append((m_name, m_ops))
+ return parsed_mergers
+
+
+def default_mergers():
+ return tuple(string_extract_mergers(DEF_MERGE_TYPE))
+
+
+def construct(parsed_mergers):
+ mergers_to_be = []
+ for (m_name, m_ops) in parsed_mergers:
+ merger_locs = importer.find_module(m_name,
+ [__name__],
+ ['Merger'])
+ if not merger_locs:
+ msg = "Could not find merger named '%s'" % (m_name)
+ raise ImportError(msg)
+ else:
+ mod = importer.import_module(merger_locs[0])
+ mod_attr = getattr(mod, 'Merger')
+ mergers_to_be.append((mod_attr, m_ops))
+ # Now form them...
+ mergers = []
+ root = LookupMerger(mergers)
+ for (attr, opts) in mergers_to_be:
+ mergers.append(attr(root, opts))
+ return root
diff --git a/cloudinit/mergers/dict.py b/cloudinit/mergers/dict.py
new file mode 100644
index 00000000..45a7d3a5
--- /dev/null
+++ b/cloudinit/mergers/dict.py
@@ -0,0 +1,48 @@
+# vi: ts=4 expandtab
+#
+# Copyright (C) 2012 Yahoo! Inc.
+#
+# Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3, as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+class Merger(object):
+ def __init__(self, merger, opts):
+ self._merger = merger
+ self._overwrite = 'overwrite' in opts
+
+ # This merging algorithm will attempt to merge with
+ # another dictionary, on encountering any other type of object
+ # it will not merge with said object, but will instead return
+ # the original value
+ #
+ # On encountering a dictionary, it will create a new dictionary
+ # composed of the original and the one to merge with, if 'overwrite'
+ # is enabled then keys that exist in the original will be overwritten
+ # by keys in the one to merge with (and associated values). Otherwise
+ # if not in overwrite mode the 2 conflicting keys themselves will
+ # be merged.
+ def _on_dict(self, value, merge_with):
+ if not isinstance(merge_with, (dict)):
+ return value
+ merged = dict(value)
+ for (k, v) in merge_with.items():
+ if k in merged:
+ if not self._overwrite:
+ merged[k] = self._merger.merge(merged[k], v)
+ else:
+ merged[k] = v
+ else:
+ merged[k] = v
+ return merged
diff --git a/cloudinit/mergers/list.py b/cloudinit/mergers/list.py
new file mode 100644
index 00000000..a56ff007
--- /dev/null
+++ b/cloudinit/mergers/list.py
@@ -0,0 +1,50 @@
+# vi: ts=4 expandtab
+#
+# Copyright (C) 2012 Yahoo! Inc.
+#
+# Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3, as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+class Merger(object):
+ def __init__(self, merger, opts):
+ self._merger = merger
+ self._discard_non = 'discard_non_list' in opts
+ self._extend = 'extend' in opts
+
+ def _on_tuple(self, value, merge_with):
+ return self._on_list(list(value), merge_with)
+
+ # On encountering a list or tuple type this action will be applied
+ # a new list will be returned, if the value to merge with is itself
+ # a list and we have been told to 'extend', then the value here will
+ # be extended with the other list. If in 'extend' mode then we will
+ # attempt to merge instead, which means that values from the list
+ # to merge with will replace values in te original list (they will
+ # also be merged recursively).
+ #
+ # If the value to merge with is not a list, and we are set to discared
+ # then no modifications will take place, otherwise we will just append
+ # the value to merge with onto the end of our own list.
+ def _on_list(self, value, merge_with):
+ new_value = list(value)
+ if isinstance(merge_with, (tuple, list)):
+ if self._extend:
+ new_value.extend(merge_with)
+ else:
+ return new_value
+ else:
+ if not self._discard_non:
+ new_value.append(merge_with)
+ return new_value
diff --git a/cloudinit/mergers/str.py b/cloudinit/mergers/str.py
new file mode 100644
index 00000000..291c91c2
--- /dev/null
+++ b/cloudinit/mergers/str.py
@@ -0,0 +1,39 @@
+# vi: ts=4 expandtab
+#
+# Copyright (C) 2012 Yahoo! Inc.
+#
+# Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3, as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+class Merger(object):
+ def __init__(self, _merger, opts):
+ self._append = 'append' in opts
+
+ # On encountering a unicode object to merge value with
+ # we will for now just proxy into the string method to let it handle it.
+ def _on_unicode(self, value, merge_with):
+ return self._on_str(value, merge_with)
+
+ # On encountering a string object to merge with we will
+ # perform the following action, if appending we will
+ # merge them together, otherwise we will just return value.
+ def _on_str(self, value, merge_with):
+ if not self._append:
+ return value
+ else:
+ if isinstance(value, (unicode)):
+ return value + unicode(merge_with)
+ else:
+ return value + str(merge_with)
diff --git a/cloudinit/sources/DataSourceAltCloud.py b/cloudinit/sources/DataSourceAltCloud.py
index 9812bdcb..64548d43 100644
--- a/cloudinit/sources/DataSourceAltCloud.py
+++ b/cloudinit/sources/DataSourceAltCloud.py
@@ -30,6 +30,7 @@ import os.path
from cloudinit import log as logging
from cloudinit import sources
from cloudinit import util
+
from cloudinit.util import ProcessExecutionError
LOG = logging.getLogger(__name__)
@@ -91,8 +92,8 @@ class DataSourceAltCloud(sources.DataSource):
self.supported_seed_starts = ("/", "file://")
def __str__(self):
- mstr = "%s [seed=%s]" % (util.obj_name(self), self.seed)
- return mstr
+ root = sources.DataSource.__str__(self)
+ return "%s [seed=%s]" % (root, self.seed)
def get_cloud_type(self):
'''
diff --git a/cloudinit/sources/DataSourceCloudStack.py b/cloudinit/sources/DataSourceCloudStack.py
index 275caf0d..81c8cda9 100644
--- a/cloudinit/sources/DataSourceCloudStack.py
+++ b/cloudinit/sources/DataSourceCloudStack.py
@@ -48,9 +48,6 @@ class DataSourceCloudStack(sources.DataSource):
raise RuntimeError("No virtual router found!")
self.metadata_address = "http://%s/" % (vr_addr)
- def __str__(self):
- return util.obj_name(self)
-
def _get_url_settings(self):
mcfg = self.ds_cfg
if not mcfg:
diff --git a/cloudinit/sources/DataSourceConfigDrive.py b/cloudinit/sources/DataSourceConfigDrive.py
index ec016a1d..5f152299 100644
--- a/cloudinit/sources/DataSourceConfigDrive.py
+++ b/cloudinit/sources/DataSourceConfigDrive.py
@@ -51,7 +51,9 @@ class DataSourceConfigDrive(sources.DataSource):
self.ec2_metadata = None
def __str__(self):
- mstr = "%s [%s,ver=%s]" % (util.obj_name(self), self.dsmode,
+ root = sources.DataSource.__str__(self)
+ mstr = "%s [%s,ver=%s]" % (root,
+ self.dsmode,
self.version)
mstr += "[source=%s]" % (self.source)
return mstr
@@ -152,7 +154,7 @@ class DataSourceConfigDrive(sources.DataSource):
return False
md = results['metadata']
- md = util.mergedict(md, DEFAULT_METADATA)
+ md = util.mergemanydict([md, DEFAULT_METADATA])
# Perform some metadata 'fixups'
#
diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py
index 2db53446..f010e640 100644
--- a/cloudinit/sources/DataSourceEc2.py
+++ b/cloudinit/sources/DataSourceEc2.py
@@ -49,9 +49,6 @@ class DataSourceEc2(sources.DataSource):
self.seed_dir = os.path.join(paths.seed_dir, "ec2")
self.api_ver = DEF_MD_VERSION
- def __str__(self):
- return util.obj_name(self)
-
def get_data(self):
seed_ret = {}
if util.read_optional_seed(seed_ret, base=(self.seed_dir + "/")):
diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py
index b55d8a21..612d8ffa 100644
--- a/cloudinit/sources/DataSourceMAAS.py
+++ b/cloudinit/sources/DataSourceMAAS.py
@@ -50,7 +50,8 @@ class DataSourceMAAS(sources.DataSource):
self.oauth_clockskew = None
def __str__(self):
- return "%s [%s]" % (util.obj_name(self), self.base_url)
+ root = sources.DataSource.__str__(self)
+ return "%s [%s]" % (root, self.base_url)
def get_data(self):
mcfg = self.ds_cfg
diff --git a/cloudinit/sources/DataSourceNoCloud.py b/cloudinit/sources/DataSourceNoCloud.py
index 603f0155..08a853cc 100644
--- a/cloudinit/sources/DataSourceNoCloud.py
+++ b/cloudinit/sources/DataSourceNoCloud.py
@@ -40,9 +40,8 @@ class DataSourceNoCloud(sources.DataSource):
self.supported_seed_starts = ("/", "file://")
def __str__(self):
- mstr = "%s [seed=%s][dsmode=%s]" % (util.obj_name(self),
- self.seed, self.dsmode)
- return mstr
+ root = sources.DataSource.__str__(self)
+ return "%s [seed=%s][dsmode=%s]" % (root, self.seed, self.dsmode)
def get_data(self):
defaults = {
@@ -65,7 +64,7 @@ class DataSourceNoCloud(sources.DataSource):
# Check to see if the seed dir has data.
seedret = {}
if util.read_optional_seed(seedret, base=self.seed_dir + "/"):
- md = util.mergedict(md, seedret['meta-data'])
+ md = util.mergemanydict([md, seedret['meta-data']])
ud = seedret['user-data']
found.append(self.seed_dir)
LOG.debug("Using seeded cache data from %s", self.seed_dir)
@@ -82,7 +81,7 @@ class DataSourceNoCloud(sources.DataSource):
if self.ds_cfg['user-data']:
ud = self.ds_cfg['user-data']
if self.ds_cfg['meta-data'] is not False:
- md = util.mergedict(md, self.ds_cfg['meta-data'])
+ md = util.mergemanydict([md, self.ds_cfg['meta-data']])
if 'ds_config' not in found:
found.append("ds_config")
@@ -100,7 +99,7 @@ class DataSourceNoCloud(sources.DataSource):
LOG.debug("Attempting to use data from %s", dev)
(newmd, newud) = util.mount_cb(dev, util.read_seeded)
- md = util.mergedict(newmd, md)
+ md = util.mergemanydict([newmd, md])
ud = newud
# For seed from a device, the default mode is 'net'.
@@ -150,11 +149,11 @@ class DataSourceNoCloud(sources.DataSource):
LOG.debug("Using seeded cache data from %s", seedfrom)
# Values in the command line override those from the seed
- md = util.mergedict(md, md_seed)
+ md = util.mergemanydict([md, md_seed])
found.append(seedfrom)
# Now that we have exhausted any other places merge in the defaults
- md = util.mergedict(md, defaults)
+ md = util.mergemanydict([md, defaults])
# Update the network-interfaces if metadata had 'network-interfaces'
# entry and this is the local datasource, or 'seedfrom' was used
diff --git a/cloudinit/sources/DataSourceNone.py b/cloudinit/sources/DataSourceNone.py
index c2125bee..12a8a992 100644
--- a/cloudinit/sources/DataSourceNone.py
+++ b/cloudinit/sources/DataSourceNone.py
@@ -18,7 +18,6 @@
from cloudinit import log as logging
from cloudinit import sources
-from cloudinit import util
LOG = logging.getLogger(__name__)
@@ -41,9 +40,6 @@ class DataSourceNone(sources.DataSource):
def get_instance_id(self):
return 'iid-datasource-none'
- def __str__(self):
- return util.obj_name(self)
-
@property
def is_disconnected(self):
return True
diff --git a/cloudinit/sources/DataSourceOVF.py b/cloudinit/sources/DataSourceOVF.py
index e90150c6..0530c4b7 100644
--- a/cloudinit/sources/DataSourceOVF.py
+++ b/cloudinit/sources/DataSourceOVF.py
@@ -43,7 +43,8 @@ class DataSourceOVF(sources.DataSource):
self.supported_seed_starts = ("/", "file://")
def __str__(self):
- return "%s [seed=%s]" % (util.obj_name(self), self.seed)
+ root = sources.DataSource.__str__(self)
+ return "%s [seed=%s]" % (root, self.seed)
def get_data(self):
found = []
@@ -93,11 +94,11 @@ class DataSourceOVF(sources.DataSource):
(md_seed, ud) = util.read_seeded(seedfrom, timeout=None)
LOG.debug("Using seeded cache data from %s", seedfrom)
- md = util.mergedict(md, md_seed)
+ md = util.mergemanydict([md, md_seed])
found.append(seedfrom)
# Now that we have exhausted any other places merge in the defaults
- md = util.mergedict(md, defaults)
+ md = util.mergemanydict([md, defaults])
self.seed = ",".join(found)
self.metadata = md
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index 96baff90..d8fbacdd 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -25,6 +25,7 @@ import os
from cloudinit import importer
from cloudinit import log as logging
+from cloudinit import type_utils
from cloudinit import user_data as ud
from cloudinit import util
@@ -52,7 +53,7 @@ class DataSource(object):
self.userdata = None
self.metadata = None
self.userdata_raw = None
- name = util.obj_name(self)
+ name = type_utils.obj_name(self)
if name.startswith(DS_PREFIX):
name = name[len(DS_PREFIX):]
self.ds_cfg = util.get_cfg_by_path(self.sys_cfg,
@@ -62,6 +63,9 @@ class DataSource(object):
else:
self.ud_proc = ud_proc
+ def __str__(self):
+ return type_utils.obj_name(self)
+
def get_userdata(self, apply_filter=False):
if self.userdata is None:
self.userdata = self.ud_proc.process(self.get_userdata_raw())
@@ -214,7 +218,7 @@ def normalize_pubkey_data(pubkey_data):
def find_source(sys_cfg, distro, paths, ds_deps, cfg_list, pkg_list):
ds_list = list_sources(cfg_list, ds_deps, pkg_list)
- ds_names = [util.obj_name(f) for f in ds_list]
+ ds_names = [type_utils.obj_name(f) for f in ds_list]
LOG.debug("Searching for data source in: %s", ds_names)
for cls in ds_list:
@@ -222,7 +226,7 @@ def find_source(sys_cfg, distro, paths, ds_deps, cfg_list, pkg_list):
LOG.debug("Seeing if we can get any data from %s", cls)
s = cls(sys_cfg, distro, paths)
if s.get_data():
- return (s, util.obj_name(cls))
+ return (s, type_utils.obj_name(cls))
except Exception:
util.logexc(LOG, "Getting data from %s failed", cls)
diff --git a/cloudinit/stages.py b/cloudinit/stages.py
index d7d1dea0..543d247f 100644
--- a/cloudinit/stages.py
+++ b/cloudinit/stages.py
@@ -43,6 +43,7 @@ from cloudinit import helpers
from cloudinit import importer
from cloudinit import log as logging
from cloudinit import sources
+from cloudinit import type_utils
from cloudinit import util
LOG = logging.getLogger(__name__)
@@ -211,7 +212,7 @@ class Init(object):
# Any config provided???
pkg_list = self.cfg.get('datasource_pkg_list') or []
# Add the defaults at the end
- for n in ['', util.obj_name(sources)]:
+ for n in ['', type_utils.obj_name(sources)]:
if n not in pkg_list:
pkg_list.append(n)
cfg_list = self.cfg.get('datasource_list') or []
@@ -271,7 +272,7 @@ class Init(object):
dp = self.paths.get_cpath('data')
# Write what the datasource was and is..
- ds = "%s: %s" % (util.obj_name(self.datasource), self.datasource)
+ ds = "%s: %s" % (type_utils.obj_name(self.datasource), self.datasource)
previous_ds = None
ds_fn = os.path.join(idir, 'datasource')
try:
@@ -488,7 +489,7 @@ class Modules(object):
else:
raise TypeError(("Failed to read '%s' item in config,"
" unknown type %s") %
- (item, util.obj_name(item)))
+ (item, type_utils.obj_name(item)))
return module_list
def _fixup_modules(self, raw_mods):
@@ -506,7 +507,7 @@ class Modules(object):
# Reset it so when ran it will get set to a known value
freq = None
mod_locs = importer.find_module(mod_name,
- ['', util.obj_name(config)],
+ ['', type_utils.obj_name(config)],
['handle'])
if not mod_locs:
LOG.warn("Could not find module named %s", mod_name)
diff --git a/cloudinit/type_utils.py b/cloudinit/type_utils.py
new file mode 100644
index 00000000..2decbfc5
--- /dev/null
+++ b/cloudinit/type_utils.py
@@ -0,0 +1,34 @@
+# vi: ts=4 expandtab
+#
+# Copyright (C) 2012 Canonical Ltd.
+# Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
+# Copyright (C) 2012 Yahoo! Inc.
+#
+# Author: Scott Moser <scott.moser@canonical.com>
+# Author: Juerg Haefliger <juerg.haefliger@hp.com>
+# Author: Joshua Harlow <harlowja@yahoo-inc.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3, as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# pylint: disable=C0302
+
+import types
+
+
+def obj_name(obj):
+ if isinstance(obj, (types.TypeType,
+ types.ModuleType,
+ types.FunctionType,
+ types.LambdaType)):
+ return str(obj.__name__)
+ return obj_name(obj.__class__)
diff --git a/cloudinit/util.py b/cloudinit/util.py
index afde2066..709d5cca 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -43,14 +43,15 @@ import subprocess
import sys
import tempfile
import time
-import types
import urlparse
import yaml
from cloudinit import importer
from cloudinit import log as logging
+from cloudinit import mergers
from cloudinit import safeyaml
+from cloudinit import type_utils
from cloudinit import url_helper as uhelp
from cloudinit import version
@@ -194,11 +195,12 @@ def fork_cb(child_cb, *args):
os._exit(0) # pylint: disable=W0212
except:
logexc(LOG, ("Failed forking and"
- " calling callback %s"), obj_name(child_cb))
+ " calling callback %s"),
+ type_utils.obj_name(child_cb))
os._exit(1) # pylint: disable=W0212
else:
LOG.debug("Forked child %s who will run callback %s",
- fid, obj_name(child_cb))
+ fid, type_utils.obj_name(child_cb))
def is_true(val, addons=None):
@@ -512,40 +514,26 @@ def make_url(scheme, host, port=None,
return urlparse.urlunparse(pieces)
-def obj_name(obj):
- if isinstance(obj, (types.TypeType,
- types.ModuleType,
- types.FunctionType,
- types.LambdaType)):
- return str(obj.__name__)
- return obj_name(obj.__class__)
-
-
def mergemanydict(srcs, reverse=False):
if reverse:
srcs = reversed(srcs)
m_cfg = {}
+ merge_how = [mergers.default_mergers()]
for a_cfg in srcs:
if a_cfg:
- m_cfg = mergedict(m_cfg, a_cfg)
+ # Take the last merger as the one that
+ # will define how to merge next...
+ mergers_to_apply = list(merge_how[-1])
+ merger = mergers.construct(mergers_to_apply)
+ m_cfg = merger.merge(m_cfg, a_cfg)
+ # If the config has now has new merger set,
+ # extract them to be used next time...
+ new_mergers = mergers.dict_extract_mergers(m_cfg)
+ if new_mergers:
+ merge_how.append(new_mergers)
return m_cfg
-def mergedict(src, cand):
- """
- Merge values from C{cand} into C{src}.
- If C{src} has a key C{cand} will not override.
- Nested dictionaries are merged recursively.
- """
- if isinstance(src, dict) and isinstance(cand, dict):
- for (k, v) in cand.iteritems():
- if k not in src:
- src[k] = v
- else:
- src[k] = mergedict(src[k], v)
- return src
-
-
@contextlib.contextmanager
def chdir(ndir):
curr = os.getcwd()
@@ -644,7 +632,7 @@ def load_yaml(blob, default=None, allowed=(dict,)):
# Yes this will just be caught, but thats ok for now...
raise TypeError(("Yaml load allows %s root types,"
" but got %s instead") %
- (allowed, obj_name(converted)))
+ (allowed, type_utils.obj_name(converted)))
loaded = converted
except (yaml.YAMLError, TypeError, ValueError):
if len(blob) == 0:
@@ -713,7 +701,7 @@ def read_conf_with_confd(cfgfile):
if not isinstance(confd, (str, basestring)):
raise TypeError(("Config file %s contains 'conf_d' "
"with non-string type %s") %
- (cfgfile, obj_name(confd)))
+ (cfgfile, type_utils.obj_name(confd)))
else:
confd = str(confd).strip()
elif os.path.isdir("%s.d" % cfgfile):
@@ -724,7 +712,7 @@ def read_conf_with_confd(cfgfile):
# Conf.d settings override input configuration
confd_cfg = read_conf_d(confd)
- return mergedict(confd_cfg, cfg)
+ return mergemanydict([confd_cfg, cfg])
def read_cc_from_cmdline(cmdline=None):
@@ -1471,7 +1459,7 @@ def shellify(cmdlist, add_header=True):
else:
raise RuntimeError(("Unable to shellify type %s"
" which is not a list or string")
- % (obj_name(args)))
+ % (type_utils.obj_name(args)))
LOG.debug("Shellified %s commands.", cmds_made)
return content
diff --git a/doc/merging.txt b/doc/merging.txt
new file mode 100644
index 00000000..f719aec8
--- /dev/null
+++ b/doc/merging.txt
@@ -0,0 +1,179 @@
+Arriving in 0.7.2 is a new way to handle dictionary merging in cloud-init.
+---
+
+Overview
+--------
+
+This was done because it has been a common feature request that there be a
+way to specify how cloud-config yaml "dictionaries" are merged together when
+there are multiple yamls to merge together (say when performing an #include).
+
+Since previously the merging algorithm was very simple and would only overwrite
+and not append lists, or strings, and so on it was decided to create a new and
+improved way to merge dictionaries (and there contained objects) together in a
+way that is customizable, thus allowing for users who provide cloud-config data
+to determine exactly how there objects will be merged.
+
+For example.
+
+#cloud-config (1)
+run_cmd:
+ - bash1
+ - bash2
+
+#cloud-config (2)
+run_cmd:
+ - bash3
+ - bash4
+
+The previous way of merging the following 2 objects would result in a final
+cloud-config object that contains the following.
+
+#cloud-config (merged)
+run_cmd:
+ - bash3
+ - bash4
+
+Typically this is not what users want, instead they would likely prefer:
+
+#cloud-config (merged)
+run_cmd:
+ - bash1
+ - bash2
+ - bash3
+ - bash4
+
+This way makes it easier to combine the various cloud-config objects you have
+into a more useful list, thus reducing duplication that would have had to
+occur in the previous method to accomplish the same result.
+
+Customizability
+---------------
+
+Since the above merging algorithm may not always be the desired merging
+algorithm (like how the merging algorithm in < 0.7.2 was not always the preferred
+one) the concept of customizing how merging can be done was introduced through
+a new concept call 'merge classes'.
+
+A merge class is a class defintion which provides functions that can be used
+to merge a given type with another given type.
+
+An example of one of these merging classes is the following:
+
+class Merger(object):
+ def __init__(self, merger, opts):
+ self._merger = merger
+ self._overwrite = 'overwrite' in opts
+
+ # This merging algorithm will attempt to merge with
+ # another dictionary, on encountering any other type of object
+ # it will not merge with said object, but will instead return
+ # the original value
+ #
+ # On encountering a dictionary, it will create a new dictionary
+ # composed of the original and the one to merge with, if 'overwrite'
+ # is enabled then keys that exist in the original will be overwritten
+ # by keys in the one to merge with (and associated values). Otherwise
+ # if not in overwrite mode the 2 conflicting keys themselves will
+ # be merged.
+ def _on_dict(self, value, merge_with):
+ if not isinstance(merge_with, (dict)):
+ return value
+ merged = dict(value)
+ for (k, v) in merge_with.items():
+ if k in merged:
+ if not self._overwrite:
+ merged[k] = self._merger.merge(merged[k], v)
+ else:
+ merged[k] = v
+ else:
+ merged[k] = v
+ return merged
+
+As you can see there is a '_on_dict' method here that will be given a source value
+and a value to merge with. The result will be the merged object. This code itself
+is called by another merging class which 'directs' the merging to happen by
+analyzing the types of the objects to merge and attempting to find a know object
+that will merge that type. I will avoid pasting that here, but it can be found
+in the mergers/__init__.py file (see LookupMerger and UnknownMerger).
+
+So following the typical cloud-init way of allowing source code to be downloaded
+and used dynamically, it is possible for users to inject there own merging files
+to handle specific types of merging as they choose (the basic ones included will
+handle lists, dicts, and strings). Note how each merge can have options associated
+with it which affect how the merging is performed, for example a dictionary merger
+can be told to overwrite instead of attempt to merge, or a string merger can be
+told to append strings instead of discarding other strings to merge with.
+
+How to activate
+---------------
+
+There are a few ways to activate the merging algorithms, and to customize them
+for your own usage.
+
+1. The first way involves the usage of MIME messages in cloud-init to specify
+ multipart documents (this is one way in which multiple cloud-config is joined
+ together into a single cloud-config). Two new headers are looked for, both
+ of which can define the way merging is done (the first header to exist wins).
+ These new headers (in lookup order) are 'Merge-Type' and 'X-Merge-Type'. The value
+ should be a string which will satisfy the new merging format defintion (see
+ below for this format).
+2. The second way is actually specifying the merge-type in the body of the
+ cloud-config dictionary. There are 2 ways to specify this, either as a string
+ or as a dictionary (see format below). The keys that are looked up for this
+ definition are the following (in order), 'merge_how', 'merge_type'.
+
+*String format*
+
+The string format that is expected is the following.
+
+"classname(option1,option2)+classname2(option3,option4)" (and so on)
+
+The class name there will be connected to class names used when looking for the
+class that can be used to merge and options provided will be given to the class
+on construction of that class.
+
+For example, the default string that is used when none is provided is the following:
+
+"list(extend)+dict()+str(append)"
+
+*Dictionary format*
+
+In cases where a dictionary can be used to specify the same information as the
+string format (ie option #2 of above) it can be used, for example.
+
+merge_how:
+ - name: list
+ settings: [extend]
+ - name: dict
+ settings: []
+ - name: str
+ settings: [append]
+
+This would be the equivalent format for default string format but in dictionary
+form instead of string form.
+
+Specifying multiple types and its effect
+----------------------------------------
+
+Now you may be asking yourself, if I specify a merge-type header or dictionary
+for every cloud-config that I provide, what exactly happens?
+
+The answer is that when merging, a stack of 'merging classes' is kept, the
+first one on that stack is the default merging classes, this set of mergers
+will be used when the first cloud-config is merged with the initial empty
+cloud-config dictionary. If the cloud-config that was just merged provided a
+set of merging classes (via the above formats) then those merging classes will
+be pushed onto the stack. Now if there is a second cloud-config to be merged then
+the merging classes from the cloud-config before the first will be used (not the
+default) and so on. This way a cloud-config can decide how it will merge with a
+cloud-config dictionary coming after it.
+
+Other uses
+----------
+
+The default merging algorithm for merging conf.d yaml files (which form a initial
+yaml config for cloud-init) was also changed to use this mechanism so its full
+benefits (and customization) can also be used there as well. Other places that
+used the previous merging are also similar now extensible (metadata merging for
+example).
diff --git a/tests/unittests/test__init__.py b/tests/unittests/test__init__.py
index ac082076..2c0abfbc 100644
--- a/tests/unittests/test__init__.py
+++ b/tests/unittests/test__init__.py
@@ -22,7 +22,8 @@ class FakeModule(handlers.Handler):
def list_types(self):
return self.types
- def _handle_part(self, data, ctype, filename, payload, frequency):
+ def handle_part(self, _data, ctype, filename, # pylint: disable=W0221
+ payload, frequency):
pass
@@ -103,6 +104,9 @@ class TestHandlerHandlePart(MockerTestCase):
self.filename = "fake filename"
self.payload = "fake payload"
self.frequency = settings.PER_INSTANCE
+ self.headers = {
+ 'Content-Type': self.ctype,
+ }
def test_normal_version_1(self):
"""
@@ -118,8 +122,8 @@ class TestHandlerHandlePart(MockerTestCase):
self.payload)
self.mocker.replay()
- handlers.run_part(mod_mock, self.data, self.ctype, self.filename,
- self.payload, self.frequency)
+ handlers.run_part(mod_mock, self.data, self.filename,
+ self.payload, self.frequency, self.headers)
def test_normal_version_2(self):
"""
@@ -135,8 +139,8 @@ class TestHandlerHandlePart(MockerTestCase):
self.payload, self.frequency)
self.mocker.replay()
- handlers.run_part(mod_mock, self.data, self.ctype, self.filename,
- self.payload, self.frequency)
+ handlers.run_part(mod_mock, self.data, self.filename,
+ self.payload, self.frequency, self.headers)
def test_modfreq_per_always(self):
"""
@@ -152,8 +156,8 @@ class TestHandlerHandlePart(MockerTestCase):
self.payload)
self.mocker.replay()
- handlers.run_part(mod_mock, self.data, self.ctype, self.filename,
- self.payload, self.frequency)
+ handlers.run_part(mod_mock, self.data, self.filename,
+ self.payload, self.frequency, self.headers)
def test_no_handle_when_modfreq_once(self):
"""C{handle_part} is not called if frequency is once."""
@@ -163,8 +167,8 @@ class TestHandlerHandlePart(MockerTestCase):
self.mocker.result(settings.PER_ONCE)
self.mocker.replay()
- handlers.run_part(mod_mock, self.data, self.ctype, self.filename,
- self.payload, self.frequency)
+ handlers.run_part(mod_mock, self.data, self.filename,
+ self.payload, self.frequency, self.headers)
def test_exception_is_caught(self):
"""Exceptions within C{handle_part} are caught and logged."""
@@ -178,8 +182,8 @@ class TestHandlerHandlePart(MockerTestCase):
self.mocker.throw(Exception())
self.mocker.replay()
- handlers.run_part(mod_mock, self.data, self.ctype, self.filename,
- self.payload, self.frequency)
+ handlers.run_part(mod_mock, self.data, self.filename,
+ self.payload, self.frequency, self.headers)
class TestCmdlineUrl(MockerTestCase):
diff --git a/tests/unittests/test_merging.py b/tests/unittests/test_merging.py
index 0037b966..ad137e85 100644
--- a/tests/unittests/test_merging.py
+++ b/tests/unittests/test_merging.py
@@ -1,62 +1,142 @@
-from mocker import MockerTestCase
-
-from cloudinit import util
-
-
-class TestMergeDict(MockerTestCase):
- def test_simple_merge(self):
- """Test simple non-conflict merge."""
- source = {"key1": "value1"}
- candidate = {"key2": "value2"}
- result = util.mergedict(source, candidate)
- self.assertEqual({"key1": "value1", "key2": "value2"}, result)
-
- def test_nested_merge(self):
- """Test nested merge."""
- source = {"key1": {"key1.1": "value1.1"}}
- candidate = {"key1": {"key1.2": "value1.2"}}
- result = util.mergedict(source, candidate)
- self.assertEqual(
- {"key1": {"key1.1": "value1.1", "key1.2": "value1.2"}}, result)
-
- def test_merge_does_not_override(self):
- """Test that candidate doesn't override source."""
- source = {"key1": "value1", "key2": "value2"}
- candidate = {"key1": "value2", "key2": "NEW VALUE"}
- result = util.mergedict(source, candidate)
- self.assertEqual(source, result)
-
- def test_empty_candidate(self):
- """Test empty candidate doesn't change source."""
- source = {"key": "value"}
- candidate = {}
- result = util.mergedict(source, candidate)
- self.assertEqual(source, result)
-
- def test_empty_source(self):
- """Test empty source is replaced by candidate."""
- source = {}
- candidate = {"key": "value"}
- result = util.mergedict(source, candidate)
- self.assertEqual(candidate, result)
-
- def test_non_dict_candidate(self):
- """Test non-dict candidate is discarded."""
- source = {"key": "value"}
- candidate = "not a dict"
- result = util.mergedict(source, candidate)
- self.assertEqual(source, result)
-
- def test_non_dict_source(self):
- """Test non-dict source is not modified with a dict candidate."""
- source = "not a dict"
- candidate = {"key": "value"}
- result = util.mergedict(source, candidate)
- self.assertEqual(source, result)
-
- def test_neither_dict(self):
- """Test if neither candidate or source is dict source wins."""
- source = "source"
- candidate = "candidate"
- result = util.mergedict(source, candidate)
- self.assertEqual(source, result)
+from tests.unittests import helpers
+
+from cloudinit import mergers
+
+
+class TestSimpleRun(helpers.MockerTestCase):
+ def test_basic_merge(self):
+ source = {
+ 'Blah': ['blah2'],
+ 'Blah3': 'c',
+ }
+ merge_with = {
+ 'Blah2': ['blah3'],
+ 'Blah3': 'b',
+ 'Blah': ['123'],
+ }
+ # Basic merge should not do thing special
+ merge_how = "list()+dict()+str()"
+ merger_set = mergers.string_extract_mergers(merge_how)
+ self.assertEquals(3, len(merger_set))
+ merger = mergers.construct(merger_set)
+ merged = merger.merge(source, merge_with)
+ self.assertEquals(merged['Blah'], ['blah2'])
+ self.assertEquals(merged['Blah2'], ['blah3'])
+ self.assertEquals(merged['Blah3'], 'c')
+
+ def test_dict_overwrite(self):
+ source = {
+ 'Blah': ['blah2'],
+ }
+ merge_with = {
+ 'Blah': ['123'],
+ }
+ # Now lets try a dict overwrite
+ merge_how = "list()+dict(overwrite)+str()"
+ merger_set = mergers.string_extract_mergers(merge_how)
+ self.assertEquals(3, len(merger_set))
+ merger = mergers.construct(merger_set)
+ merged = merger.merge(source, merge_with)
+ self.assertEquals(merged['Blah'], ['123'])
+
+ def test_string_append(self):
+ source = {
+ 'Blah': 'blah2',
+ }
+ merge_with = {
+ 'Blah': '345',
+ }
+ merge_how = "list()+dict()+str(append)"
+ merger_set = mergers.string_extract_mergers(merge_how)
+ self.assertEquals(3, len(merger_set))
+ merger = mergers.construct(merger_set)
+ merged = merger.merge(source, merge_with)
+ self.assertEquals(merged['Blah'], 'blah2345')
+
+ def test_list_extend(self):
+ source = ['abc']
+ merge_with = ['123']
+ merge_how = "list(extend)+dict()+str()"
+ merger_set = mergers.string_extract_mergers(merge_how)
+ self.assertEquals(3, len(merger_set))
+ merger = mergers.construct(merger_set)
+ merged = merger.merge(source, merge_with)
+ self.assertEquals(merged, ['abc', '123'])
+
+ def test_deep_merge(self):
+ source = {
+ 'a': [1, 'b', 2],
+ 'b': 'blahblah',
+ 'c': {
+ 'e': [1, 2, 3],
+ 'f': 'bigblobof',
+ 'iamadict': {
+ 'ok': 'ok',
+ }
+ },
+ 'run': [
+ 'runme',
+ 'runme2',
+ ],
+ 'runmereally': [
+ 'e', ['a'], 'd',
+ ],
+ }
+ merge_with = {
+ 'a': ['e', 'f', 'g'],
+ 'b': 'more',
+ 'c': {
+ 'a': 'b',
+ 'f': 'stuff',
+ },
+ 'run': [
+ 'morecmd',
+ 'moremoremore',
+ ],
+ 'runmereally': [
+ 'blah', ['b'], 'e',
+ ],
+ }
+ merge_how = "list(extend)+dict()+str(append)"
+ merger_set = mergers.string_extract_mergers(merge_how)
+ self.assertEquals(3, len(merger_set))
+ merger = mergers.construct(merger_set)
+ merged = merger.merge(source, merge_with)
+ self.assertEquals(merged['a'], [1, 'b', 2, 'e', 'f', 'g'])
+ self.assertEquals(merged['b'], 'blahblahmore')
+ self.assertEquals(merged['c']['f'], 'bigblobofstuff')
+ self.assertEquals(merged['run'], ['runme', 'runme2', 'morecmd',
+ 'moremoremore'])
+ self.assertEquals(merged['runmereally'], ['e', ['a'], 'd', 'blah',
+ ['b'], 'e'])
+
+ def test_dict_overwrite_layered(self):
+ source = {
+ 'Blah3': {
+ 'f': '3',
+ 'g': {
+ 'a': 'b',
+ }
+ }
+ }
+ merge_with = {
+ 'Blah3': {
+ 'e': '2',
+ 'g': {
+ 'e': 'f',
+ }
+ }
+ }
+ merge_how = "list()+dict()+str()"
+ merger_set = mergers.string_extract_mergers(merge_how)
+ self.assertEquals(3, len(merger_set))
+ merger = mergers.construct(merger_set)
+ merged = merger.merge(source, merge_with)
+ self.assertEquals(merged['Blah3'], {
+ 'e': '2',
+ 'f': '3',
+ 'g': {
+ 'a': 'b',
+ 'e': 'f',
+ }
+ })
diff --git a/tests/unittests/test_userdata.py b/tests/unittests/test_userdata.py
index 82a4c555..fdfe2542 100644
--- a/tests/unittests/test_userdata.py
+++ b/tests/unittests/test_userdata.py
@@ -7,14 +7,17 @@ import os
from email.mime.base import MIMEBase
-from mocker import MockerTestCase
-
+from cloudinit import handlers
+from cloudinit import helpers as c_helpers
from cloudinit import log
from cloudinit import sources
from cloudinit import stages
+from cloudinit import util
INSTANCE_ID = "i-testing"
+from tests.unittests import helpers
+
class FakeDataSource(sources.DataSource):
@@ -26,22 +29,16 @@ class FakeDataSource(sources.DataSource):
# FIXME: these tests shouldn't be checking log output??
# Weirddddd...
-
-
-class TestConsumeUserData(MockerTestCase):
+class TestConsumeUserData(helpers.FilesystemMockingTestCase):
def setUp(self):
- MockerTestCase.setUp(self)
- # Replace the write so no actual files
- # get written out...
- self.mock_write = self.mocker.replace("cloudinit.util.write_file",
- passthrough=False)
+ helpers.FilesystemMockingTestCase.setUp(self)
self._log = None
self._log_file = None
self._log_handler = None
def tearDown(self):
- MockerTestCase.tearDown(self)
+ helpers.FilesystemMockingTestCase.tearDown(self)
if self._log_handler and self._log:
self._log.removeHandler(self._log_handler)
@@ -53,13 +50,77 @@ class TestConsumeUserData(MockerTestCase):
self._log.addHandler(self._log_handler)
return log_file
+ def test_merging_cloud_config(self):
+ blob = '''
+#cloud-config
+a: b
+e: f
+run:
+ - b
+ - c
+'''
+ message1 = MIMEBase("text", "cloud-config")
+ message1['Merge-Type'] = 'dict()+list(extend)+str(append)'
+ message1.set_payload(blob)
+
+ blob2 = '''
+#cloud-config
+a: e
+e: g
+run:
+ - stuff
+ - morestuff
+'''
+ message2 = MIMEBase("text", "cloud-config")
+ message2['X-Merge-Type'] = 'dict()+list(extend)+str()'
+ message2.set_payload(blob2)
+
+ blob3 = '''
+#cloud-config
+e:
+ - 1
+ - 2
+ - 3
+p: 1
+'''
+ message3 = MIMEBase("text", "cloud-config")
+ message3['Merge-Type'] = 'dict()+list()+str()'
+ message3.set_payload(blob3)
+
+ messages = [message1, message2, message3]
+
+ paths = c_helpers.Paths({}, ds=FakeDataSource(''))
+ cloud_cfg = handlers.cloud_config.CloudConfigPartHandler(paths)
+
+ new_root = self.makeDir()
+ self.patchUtils(new_root)
+ self.patchOS(new_root)
+ cloud_cfg.handle_part(None, handlers.CONTENT_START, None, None, None,
+ None)
+ for i, m in enumerate(messages):
+ headers = dict(m)
+ fn = "part-%s" % (i + 1)
+ payload = m.get_payload(decode=True)
+ cloud_cfg.handle_part(None, headers['Content-Type'],
+ fn, payload, None, headers)
+ cloud_cfg.handle_part(None, handlers.CONTENT_END, None, None, None,
+ None)
+ contents = util.load_file(paths.get_ipath('cloud_config'))
+ contents = util.load_yaml(contents)
+ self.assertEquals(contents['run'], ['b', 'c', 'stuff', 'morestuff'])
+ self.assertEquals(contents['a'], 'be')
+ self.assertEquals(contents['e'], 'fg')
+ self.assertEquals(contents['p'], 1)
+
def test_unhandled_type_warning(self):
"""Raw text without magic is ignored but shows warning."""
ci = stages.Init()
data = "arbitrary text\n"
ci.datasource = FakeDataSource(data)
- self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)
+ mock_write = self.mocker.replace("cloudinit.util.write_file",
+ passthrough=False)
+ mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)
self.mocker.replay()
log_file = self.capture_log(logging.WARNING)
@@ -76,7 +137,9 @@ class TestConsumeUserData(MockerTestCase):
message.set_payload("Just text")
ci.datasource = FakeDataSource(message.as_string())
- self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)
+ mock_write = self.mocker.replace("cloudinit.util.write_file",
+ passthrough=False)
+ mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)
self.mocker.replay()
log_file = self.capture_log(logging.WARNING)
@@ -93,8 +156,10 @@ class TestConsumeUserData(MockerTestCase):
ci.datasource = FakeDataSource(script)
outpath = os.path.join(ci.paths.get_ipath_cur("scripts"), "part-001")
- self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)
- self.mock_write(outpath, script, 0700)
+ mock_write = self.mocker.replace("cloudinit.util.write_file",
+ passthrough=False)
+ mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)
+ mock_write(outpath, script, 0700)
self.mocker.replay()
log_file = self.capture_log(logging.WARNING)
@@ -111,8 +176,10 @@ class TestConsumeUserData(MockerTestCase):
ci.datasource = FakeDataSource(message.as_string())
outpath = os.path.join(ci.paths.get_ipath_cur("scripts"), "part-001")
- self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)
- self.mock_write(outpath, script, 0700)
+ mock_write = self.mocker.replace("cloudinit.util.write_file",
+ passthrough=False)
+ mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)
+ mock_write(outpath, script, 0700)
self.mocker.replay()
log_file = self.capture_log(logging.WARNING)
@@ -129,8 +196,10 @@ class TestConsumeUserData(MockerTestCase):
ci.datasource = FakeDataSource(message.as_string())
outpath = os.path.join(ci.paths.get_ipath_cur("scripts"), "part-001")
- self.mock_write(outpath, script, 0700)
- self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)
+ mock_write = self.mocker.replace("cloudinit.util.write_file",
+ passthrough=False)
+ mock_write(outpath, script, 0700)
+ mock_write(ci.paths.get_ipath("cloud_config"), "", 0600)
self.mocker.replay()
log_file = self.capture_log(logging.WARNING)