7 files changed, 169 insertions, 83 deletions
diff --git a/ChangeLog b/ChangeLog
index dcc67626..72f72a3c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -5,6 +5,10 @@
    'initctl reload-configuration' (LP: #1124384).  If so, then invoke it.
  - add Azure datasource.
  - add support for SuSE / SLES [Juerg Haefliger]
+ - add a trailing carriage return to chpasswd input, which reportedly
+   caused a problem on rhel5 if missing.
+ - support individual MIME segments to be gzip compressed (LP: #1203203)
+ - always finalize handlers even if processing failed (LP: #1203368)
 0.7.2:
  - add a debian watch file
  - add 'sudo' entry to ubuntu's default user (LP: #1080717)
diff --git a/cloudinit/config/cc_set_passwords.py b/cloudinit/config/cc_set_passwords.py
index e93c8c6f..56a36906 100644
--- a/cloudinit/config/cc_set_passwords.py
+++ b/cloudinit/config/cc_set_passwords.py
@@ -75,7 +75,7 @@ def handle(_name, cfg, cloud, log, args):
             plist_in.append("%s:%s" % (u, p))
             users.append(u)
 
-        ch_in = '\n'.join(plist_in)
+        ch_in = '\n'.join(plist_in) + '\n'
         try:
             log.debug("Changing password for %s:", users)
             util.subp(['chpasswd'], ch_in)
diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py
index 497d68c5..1d450061 100644
--- a/cloudinit/handlers/__init__.py
+++ b/cloudinit/handlers/__init__.py
@@ -151,10 +151,9 @@ def walker_handle_handler(pdata, _ctype, _filename, payload):
     try:
         mod = fixup_handler(importer.import_module(modname))
         call_begin(mod, pdata['data'], frequency)
-        # Only register and increment
-        # after the above have worked (so we don't if it
-        # fails)
-        handlers.register(mod)
+        # Only register and increment after the above have worked, so we don't
+        # register if it fails starting.
+        handlers.register(mod, initialized=True)
         pdata['handlercount'] = curcount + 1
     except:
         util.logexc(LOG, "Failed at registering python file: %s (part "
diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py
index b91c1290..1c46efde 100644
--- a/cloudinit/helpers.py
+++ b/cloudinit/helpers.py
@@ -281,6 +281,7 @@ class ContentHandlers(object):
 
     def __init__(self):
         self.registered = {}
+        self.initialized = []
 
     def __contains__(self, item):
         return self.is_registered(item)
@@ -291,11 +292,13 @@ class ContentHandlers(object):
     def is_registered(self, content_type):
         return content_type in self.registered
 
-    def register(self, mod):
+    def register(self, mod, initialized=False):
         types = set()
         for t in mod.list_types():
             self.registered[t] = mod
             types.add(t)
+        if initialized and mod not in self.initialized:
+            self.initialized.append(mod)
         return types
 
     def _get_handler(self, content_type):
diff --git a/cloudinit/stages.py b/cloudinit/stages.py
index df49cabb..3e49e8c5 100644
--- a/cloudinit/stages.py
+++ b/cloudinit/stages.py
@@ -344,12 +344,13 @@ class Init(object):
         cdir = self.paths.get_cpath("handlers")
         idir = self._get_ipath("handlers")
 
-        # Add the path to the plugins dir to the top of our list for import
-        # instance dir should be read before cloud-dir
-        if cdir and cdir not in sys.path:
-            sys.path.insert(0, cdir)
-        if idir and idir not in sys.path:
-            sys.path.insert(0, idir)
+        # Add the path to the plugins dir to the top of our list for importing
+        # new handlers.
+        #
+        # Note(harlowja): instance dir should be read before cloud-dir
+        for d in [cdir, idir]:
+            if d and d not in sys.path:
+                sys.path.insert(0, d)
 
         # Ensure datasource fetched before activation (just incase)
         user_data_msg = self.datasource.get_userdata(True)
@@ -357,24 +358,34 @@ class Init(object):
         # This keeps track of all the active handlers
         c_handlers = helpers.ContentHandlers()
 
-        # Add handlers in cdir
-        potential_handlers = util.find_modules(cdir)
-        for (fname, mod_name) in potential_handlers.iteritems():
-            try:
-                mod_locs = importer.find_module(mod_name, [''],
-                                                ['list_types',
-                                                 'handle_part'])
-                if not mod_locs:
-                    LOG.warn(("Could not find a valid user-data handler"
-                              " named %s in file %s"), mod_name, fname)
-                    continue
-                mod = importer.import_module(mod_locs[0])
-                mod = handlers.fixup_handler(mod)
-                types = c_handlers.register(mod)
-                LOG.debug("Added handler for %s from %s", types, fname)
-            except:
-                util.logexc(LOG, "Failed to register handler from %s", fname)
-
+        def register_handlers_in_dir(path):
+            # Attempts to register any handler modules under the given path.
+            if not path or not os.path.isdir(path):
+                return
+            potential_handlers = util.find_modules(path)
+            for (fname, mod_name) in potential_handlers.iteritems():
+                try:
+                    mod_locs = importer.find_module(mod_name, [''],
+                                                    ['list_types',
+                                                     'handle_part'])
+                    if not mod_locs:
+                        LOG.warn(("Could not find a valid user-data handler"
+                                  " named %s in file %s"), mod_name, fname)
+                        continue
+                    mod = importer.import_module(mod_locs[0])
+                    mod = handlers.fixup_handler(mod)
+                    types = c_handlers.register(mod)
+                    LOG.debug("Added handler for %s from %s", types, fname)
+                except Exception:
+                    util.logexc(LOG, "Failed to register handler from %s",
+                                fname)
+
+        # Add any handlers in the cloud-dir
+        register_handlers_in_dir(cdir)
+
+        # Register any other handlers that come from the default set. This
+        # is done after the cloud-dir handlers so that the cdir modules can
+        # take over the default user-data handler content-types.
         def_handlers = self._default_userdata_handlers()
         applied_def_handlers = c_handlers.register_defaults(def_handlers)
         if applied_def_handlers:
@@ -383,36 +394,51 @@ class Init(object):
         # Form our cloud interface
         data = self.cloudify()
 
-        # Init the handlers first
-        called = []
-        for (_ctype, mod) in c_handlers.iteritems():
-            if mod in called:
-                continue
-            handlers.call_begin(mod, data, frequency)
-            called.append(mod)
-
-        # Walk the user data
-        part_data = {
-            'handlers': c_handlers,
-            # Any new handlers that are encountered get writen here
-            'handlerdir': idir,
-            'data': data,
-            # The default frequency if handlers don't have one
-            'frequency': frequency,
-            # This will be used when new handlers are found
-            # to help write there contents to files with numbered
-            # names...
-            'handlercount': 0,
-        }
-        handlers.walk(user_data_msg, handlers.walker_callback, data=part_data)
+        def init_handlers():
+            # Init the handlers first
+            for (_ctype, mod) in c_handlers.iteritems():
+                if mod in c_handlers.initialized:
+                    # Avoid initing the same module twice (if said module
+                    # is registered to more than one content-type).
+                    continue
+                handlers.call_begin(mod, data, frequency)
+                c_handlers.initialized.append(mod)
+
+        def walk_handlers():
+            # Walk the user data
+            part_data = {
+                'handlers': c_handlers,
+                # Any new handlers that are encountered get writen here
+                'handlerdir': idir,
+                'data': data,
+                # The default frequency if handlers don't have one
+                'frequency': frequency,
+                # This will be used when new handlers are found
+                # to help write there contents to files with numbered
+                # names...
+                'handlercount': 0,
+            }
+            handlers.walk(user_data_msg, handlers.walker_callback,
+                          data=part_data)
+
+        def finalize_handlers():
+            # Give callbacks opportunity to finalize
+            for (_ctype, mod) in c_handlers.iteritems():
+                if mod not in c_handlers.initialized:
+                    # Said module was never inited in the first place, so lets
+                    # not attempt to finalize those that never got called.
+                    continue
+                c_handlers.initialized.remove(mod)
+                try:
+                    handlers.call_end(mod, data, frequency)
+                except:
+                    util.logexc(LOG, "Failed to finalize handler: %s", mod)
 
-        # Give callbacks opportunity to finalize
-        called = []
-        for (_ctype, mod) in c_handlers.iteritems():
-            if mod in called:
-                continue
-            handlers.call_end(mod, data, frequency)
-            called.append(mod)
+        try:
+            init_handlers()
+            walk_handlers()
+        finally:
+            finalize_handlers()
 
         # Perform post-consumption adjustments so that
         # modules that run during the init stage reflect
diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py
index df069ff8..d49ea094 100644
--- a/cloudinit/user_data.py
+++ b/cloudinit/user_data.py
@@ -23,8 +23,10 @@
 import os
 
 import email
+
 from email.mime.base import MIMEBase
 from email.mime.multipart import MIMEMultipart
+from email.mime.nonmultipart import MIMENonMultipart
 from email.mime.text import MIMEText
 
 from cloudinit import handlers
@@ -48,6 +50,18 @@ ARCHIVE_TYPES = ["text/cloud-config-archive"]
 UNDEF_TYPE = "text/plain"
 ARCHIVE_UNDEF_TYPE = "text/cloud-config"
 
+# This seems to hit most of the gzip possible content types.
+DECOMP_TYPES = [
+    'application/gzip',
+    'application/gzip-compressed',
+    'application/gzipped',
+    'application/x-compress',
+    'application/x-compressed',
+    'application/x-gunzip',
+    'application/x-gzip',
+    'application/x-gzip-compressed',
+]
+
 # Msg header used to track attachments
 ATTACHMENT_FIELD = 'Number-Attachments'
 
@@ -56,6 +70,17 @@ ATTACHMENT_FIELD = 'Number-Attachments'
 EXAMINE_FOR_LAUNCH_INDEX = ["text/cloud-config"]
 
 
+def _replace_header(msg, key, value):
+    del msg[key]
+    msg[key] = value
+
+
+def _set_filename(msg, filename):
+    del msg['Content-Disposition']
+    msg.add_header('Content-Disposition',
+                   'attachment', filename=str(filename))
+
+
 class UserDataProcessor(object):
     def __init__(self, paths):
         self.paths = paths
@@ -67,6 +92,10 @@ class UserDataProcessor(object):
         return accumulating_msg
 
     def _process_msg(self, base_msg, append_msg):
+
+        def find_ctype(payload):
+            return handlers.type_from_starts_with(payload)
+
         for part in base_msg.walk():
             if is_skippable(part):
                 continue
@@ -74,21 +103,51 @@ class UserDataProcessor(object):
             ctype = None
             ctype_orig = part.get_content_type()
             payload = part.get_payload(decode=True)
+            was_compressed = False
+
+            # When the message states it is of a gzipped content type ensure
+            # that we attempt to decode said payload so that the decompressed
+            # data can be examined (instead of the compressed data).
+            if ctype_orig in DECOMP_TYPES:
+                try:
+                    payload = util.decomp_gzip(payload, quiet=False)
+                    # At this point we don't know what the content-type is
+                    # since we just decompressed it.
+                    ctype_orig = None
+                    was_compressed = True
+                except util.DecompressionError as e:
+                    LOG.warn("Failed decompressing payload from %s of length"
+                             " %s due to: %s", ctype_orig, len(payload), e)
+                    continue
 
+            # Attempt to figure out the payloads content-type
             if not ctype_orig:
                 ctype_orig = UNDEF_TYPE
-
             if ctype_orig in TYPE_NEEDED:
-                ctype = handlers.type_from_starts_with(payload)
-
+                ctype = find_ctype(payload)
             if ctype is None:
                 ctype = ctype_orig
 
+            # In the case where the data was compressed, we want to make sure
+            # that we create a new message that contains the found content
+            # type with the uncompressed content since later traversals of the
+            # messages will expect a part not compressed.
+            if was_compressed:
+                maintype, subtype = ctype.split("/", 1)
+                n_part = MIMENonMultipart(maintype, subtype)
+                n_part.set_payload(payload)
+                # Copy various headers from the old part to the new one,
+                # but don't include all the headers since some are not useful
+                # after decoding and decompression.
+                if part.get_filename():
+                    _set_filename(n_part, part.get_filename())
+                for h in ('Launch-Index',):
+                    if h in part:
+                        _replace_header(n_part, h, str(part[h]))
+                part = n_part
+
             if ctype != ctype_orig:
-                if CONTENT_TYPE in part:
-                    part.replace_header(CONTENT_TYPE, ctype)
-                else:
-                    part[CONTENT_TYPE] = ctype
+                _replace_header(part, CONTENT_TYPE, ctype)
 
             if ctype in INCLUDE_TYPES:
                 self._do_include(payload, append_msg)
@@ -98,12 +157,9 @@ class UserDataProcessor(object):
                 self._explode_archive(payload, append_msg)
                 continue
 
-            # Should this be happening, shouldn't
+            # TODO(harlowja): Should this be happening, shouldn't
             # the part header be modified and not the base?
-            if CONTENT_TYPE in base_msg:
-                base_msg.replace_header(CONTENT_TYPE, ctype)
-            else:
-                base_msg[CONTENT_TYPE] = ctype
+            _replace_header(base_msg, CONTENT_TYPE, ctype)
 
             self._attach_part(append_msg, part)
 
@@ -138,8 +194,7 @@ class UserDataProcessor(object):
 
     def _process_before_attach(self, msg, attached_id):
         if not msg.get_filename():
-            msg.add_header('Content-Disposition',
-                           'attachment', filename=PART_FN_TPL % (attached_id))
+            _set_filename(msg, PART_FN_TPL % (attached_id))
         self._attach_launch_index(msg)
 
     def _do_include(self, content, append_msg):
@@ -217,13 +272,15 @@ class UserDataProcessor(object):
                 msg.set_payload(content)
 
             if 'filename' in ent:
-                msg.add_header('Content-Disposition',
-                               'attachment', filename=ent['filename'])
+                _set_filename(msg, ent['filename'])
             if 'launch-index' in ent:
                 msg.add_header('Launch-Index', str(ent['launch-index']))
 
             for header in list(ent.keys()):
-                if header in ('content', 'filename', 'type', 'launch-index'):
+                if header.lower() in ('content', 'filename', 'type',
+                                      'launch-index', 'content-disposition',
+                                      ATTACHMENT_FIELD.lower(),
+                                      CONTENT_TYPE.lower()):
                     continue
                 msg.add_header(header, ent[header])
 
@@ -238,13 +295,13 @@ class UserDataProcessor(object):
             outer_msg[ATTACHMENT_FIELD] = '0'
 
         if new_count is not None:
-            outer_msg.replace_header(ATTACHMENT_FIELD, str(new_count))
+            _replace_header(outer_msg, ATTACHMENT_FIELD, str(new_count))
 
         fetched_count = 0
         try:
             fetched_count = int(outer_msg.get(ATTACHMENT_FIELD))
         except (ValueError, TypeError):
-            outer_msg.replace_header(ATTACHMENT_FIELD, str(fetched_count))
+            _replace_header(outer_msg, ATTACHMENT_FIELD, str(fetched_count))
         return fetched_count
 
     def _attach_part(self, outer_msg, part):
@@ -276,10 +333,7 @@ def convert_string(raw_data, headers=None):
     if "mime-version:" in data[0:4096].lower():
         msg = email.message_from_string(data)
         for (key, val) in headers.iteritems():
-            if key in msg:
-                msg.replace_header(key, val)
-            else:
-                msg[key] = val
+            _replace_header(msg, key, val)
     else:
         mtype = headers.get(CONTENT_TYPE, NOT_MULTIPART_TYPE)
         maintype, subtype = mtype.split("/", 1)
diff --git a/cloudinit/version.py b/cloudinit/version.py
index 024d5118..4b29a587 100644
--- a/cloudinit/version.py
+++ b/cloudinit/version.py
@@ -20,7 +20,7 @@ from distutils import version as vr
 
 
 def version():
-    return vr.StrictVersion("0.7.2")
+    return vr.StrictVersion("0.7.3")
 
 
 def version_string():