15 files changed, 444 insertions, 96 deletions
diff --git a/cloudinit/config/cc_set_passwords.py b/cloudinit/config/cc_set_passwords.py
index e93c8c6f..56a36906 100644
--- a/cloudinit/config/cc_set_passwords.py
+++ b/cloudinit/config/cc_set_passwords.py
@@ -75,7 +75,7 @@ def handle(_name, cfg, cloud, log, args):
             plist_in.append("%s:%s" % (u, p))
             users.append(u)
 
-        ch_in = '\n'.join(plist_in)
+        ch_in = '\n'.join(plist_in) + '\n'
         try:
             log.debug("Changing password for %s:", users)
             util.subp(['chpasswd'], ch_in)
diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py
index 297e7451..4c7c9295 100644
--- a/cloudinit/handlers/__init__.py
+++ b/cloudinit/handlers/__init__.py
@@ -152,10 +152,9 @@ def walker_handle_handler(pdata, _ctype, _filename, payload):
     try:
         mod = fixup_handler(importer.import_module(modname))
         call_begin(mod, pdata['data'], frequency)
-        # Only register and increment
-        # after the above have worked (so we don't if it
-        # fails)
-        handlers.register(mod)
+        # Only register and increment after the above have worked, so we don't
+        # register if it fails starting.
+        handlers.register(mod, initialized=True)
         pdata['handlercount'] = curcount + 1
     except:
         util.logexc(LOG, "Failed at registering python file: %s (part "
diff --git a/cloudinit/handlers/boot_hook.py b/cloudinit/handlers/boot_hook.py
index 11ac4fe5..1848ce2c 100644
--- a/cloudinit/handlers/boot_hook.py
+++ b/cloudinit/handlers/boot_hook.py
@@ -29,6 +29,7 @@ from cloudinit import util
 from cloudinit.settings import (PER_ALWAYS)
 
 LOG = logging.getLogger(__name__)
+BOOTHOOK_PREFIX = "#cloud-boothook"
 
 
 class BootHookPartHandler(handlers.Handler):
@@ -41,19 +42,15 @@ class BootHookPartHandler(handlers.Handler):
 
     def list_types(self):
         return [
-            handlers.type_from_starts_with("#cloud-boothook"),
+            handlers.type_from_starts_with(BOOTHOOK_PREFIX),
         ]
 
     def _write_part(self, payload, filename):
         filename = util.clean_filename(filename)
-        payload = util.dos2unix(payload)
-        prefix = "#cloud-boothook"
-        start = 0
-        if payload.startswith(prefix):
-            start = len(prefix) + 1
         filepath = os.path.join(self.boothook_dir, filename)
-        contents = payload[start:]
-        util.write_file(filepath, contents, 0700)
+        contents = util.strip_prefix_suffix(util.dos2unix(payload),
+                                            prefix=BOOTHOOK_PREFIX)
+        util.write_file(filepath, contents.lstrip(), 0700)
         return filepath
 
     def handle_part(self, _data, ctype, filename,  # pylint: disable=W0221
diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py
index 4dcdbe8b..0f080e66 100644
--- a/cloudinit/handlers/cloud_config.py
+++ b/cloudinit/handlers/cloud_config.py
@@ -51,6 +51,7 @@ MERGE_HEADER = 'Merge-Type'
 #
 # This gets loaded into yaml with final result {'a': 22}
 DEF_MERGERS = mergers.string_extract_mergers('dict(replace)+list()+str()')
+CLOUD_PREFIX = "#cloud-config"
 
 # The file header -> content types this module will handle.
 CC_TYPES = {
@@ -84,7 +85,7 @@ class CloudConfigPartHandler(handlers.Handler):
         if self.cloud_buf is not None:
             # Something was actually gathered....
             lines = [
-                "#cloud-config",
+                CLOUD_PREFIX,
                 '',
             ]
             lines.extend(file_lines)
diff --git a/cloudinit/handlers/shell_script.py b/cloudinit/handlers/shell_script.py
index b185c374..62289d98 100644
--- a/cloudinit/handlers/shell_script.py
+++ b/cloudinit/handlers/shell_script.py
@@ -29,6 +29,7 @@ from cloudinit import util
 from cloudinit.settings import (PER_ALWAYS)
 
 LOG = logging.getLogger(__name__)
+SHELL_PREFIX = "#!"
 
 
 class ShellScriptPartHandler(handlers.Handler):
@@ -38,7 +39,7 @@ class ShellScriptPartHandler(handlers.Handler):
 
     def list_types(self):
         return [
-            handlers.type_from_starts_with("#!"),
+            handlers.type_from_starts_with(SHELL_PREFIX),
         ]
 
     def handle_part(self, _data, ctype, filename,  # pylint: disable=W0221
diff --git a/cloudinit/handlers/upstart_job.py b/cloudinit/handlers/upstart_job.py
index 7a73d1b2..bac4cad2 100644
--- a/cloudinit/handlers/upstart_job.py
+++ b/cloudinit/handlers/upstart_job.py
@@ -31,6 +31,7 @@ from cloudinit import util
 from cloudinit.settings import (PER_INSTANCE)
 
 LOG = logging.getLogger(__name__)
+UPSTART_PREFIX = "#upstart-job"
 
 
 class UpstartJobPartHandler(handlers.Handler):
@@ -40,7 +41,7 @@ class UpstartJobPartHandler(handlers.Handler):
 
     def list_types(self):
         return [
-            handlers.type_from_starts_with("#upstart-job"),
+            handlers.type_from_starts_with(UPSTART_PREFIX),
         ]
 
     def handle_part(self, _data, ctype, filename,  # pylint: disable=W0221
diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py
index b91c1290..1c46efde 100644
--- a/cloudinit/helpers.py
+++ b/cloudinit/helpers.py
@@ -281,6 +281,7 @@ class ContentHandlers(object):
 
     def __init__(self):
         self.registered = {}
+        self.initialized = []
 
     def __contains__(self, item):
         return self.is_registered(item)
@@ -291,11 +292,13 @@ class ContentHandlers(object):
     def is_registered(self, content_type):
         return content_type in self.registered
 
-    def register(self, mod):
+    def register(self, mod, initialized=False):
         types = set()
         for t in mod.list_types():
             self.registered[t] = mod
             types.add(t)
+        if initialized and mod not in self.initialized:
+            self.initialized.append(mod)
         return types
 
     def _get_handler(self, content_type):
diff --git a/cloudinit/settings.py b/cloudinit/settings.py
index dc371cd2..9f6badae 100644
--- a/cloudinit/settings.py
+++ b/cloudinit/settings.py
@@ -37,6 +37,7 @@ CFG_BUILTIN = {
         'MAAS',
         'Ec2',
         'CloudStack',
+        'SmartOS',
         # At the end to act as a 'catch' when none of the above work...
         'None',
     ],
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index c90d7b07..0a5caebe 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -114,7 +114,8 @@ class DataSourceAzureNet(sources.DataSource):
             # claim the datasource even if the command failed
             util.logexc(LOG, "agent command '%s' failed.", mycfg['cmd'])
 
-        wait_for = [os.path.join(mycfg['datadir'], "SharedConfig.xml")]
+        shcfgxml = os.path.join(mycfg['datadir'], "SharedConfig.xml")
+        wait_for = [shcfgxml]
 
         fp_files = []
         for pk in self.cfg.get('_pubkeys', []):
@@ -129,6 +130,14 @@ class DataSourceAzureNet(sources.DataSource):
             LOG.debug("waited %.3f seconds for %d files to appear",
                       time.time() - start, len(wait_for))
 
+        if shcfgxml in missing:
+            LOG.warn("SharedConfig.xml missing, using static instance-id")
+        else:
+            try:
+                self.metadata['instance-id'] = iid_from_shared_config(shcfgxml)
+            except ValueError as e:
+                LOG.warn("failed to get instance id in %s: %s" % (shcfgxml, e))
+
         pubkeys = pubkeys_from_crt_files(fp_files)
 
         self.metadata['public-keys'] = pubkeys
@@ -252,6 +261,20 @@ def load_azure_ovf_pubkeys(sshnode):
     return found
 
 
+def single_node_at_path(node, pathlist):
+    curnode = node
+    for tok in pathlist:
+        results = find_child(curnode, lambda n: n.localName == tok)
+        if len(results) == 0:
+            raise ValueError("missing %s token in %s" % (tok, str(pathlist)))
+        if len(results) > 1:
+            raise ValueError("found %s nodes of type %s looking for %s" %
+                             (len(results), tok, str(pathlist)))
+        curnode = results[0]
+
+    return curnode
+
+
 def read_azure_ovf(contents):
     try:
         dom = minidom.parseString(contents)
@@ -362,6 +385,25 @@ def load_azure_ds_dir(source_dir):
     return (md, ud, cfg, {'ovf-env.xml': contents})
 
 
+def iid_from_shared_config(path):
+    with open(path, "rb") as fp:
+        content = fp.read()
+    return iid_from_shared_config_content(content)
+
+
+def iid_from_shared_config_content(content):
+    """
+    find INSTANCE_ID in:
+    <?xml version="1.0" encoding="utf-8"?>
+    <SharedConfig version="1.0.0.0" goalStateIncarnation="1">
+      <Deployment name="INSTANCE_ID" guid="{...}" incarnation="0">
+        <Service name="..." guid="{00000000-0000-0000-0000-000000000000}" />
+    """
+    dom = minidom.parseString(content)
+    depnode = single_node_at_path(dom, ["SharedConfig", "Deployment"])
+    return depnode.attributes.get('name').value
+
+
 class BrokenAzureDataSource(Exception):
     pass
 
diff --git a/cloudinit/sources/DataSourceSmartOS.py b/cloudinit/sources/DataSourceSmartOS.py
new file mode 100644
index 00000000..1ce20c10
--- /dev/null
+++ b/cloudinit/sources/DataSourceSmartOS.py
@@ -0,0 +1,195 @@
+# vi: ts=4 expandtab
+#
+#    Copyright (C) 2013 Canonical Ltd.
+#
+#    Author: Ben Howard <ben.howard@canonical.com>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License version 3, as
+#    published by the Free Software Foundation.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+#    Datasource for provisioning on SmartOS. This works on Joyent
+#        and public/private Clouds using SmartOS.
+#
+#    SmartOS hosts use a serial console (/dev/ttyS1) on Linux Guests.
+#        The meta-data is transmitted via key/value pairs made by
+#        requests on the console. For example, to get the hostname, you
+#        would send "GET hostname" on /dev/ttyS1.
+#
+
+
+from cloudinit import log as logging
+from cloudinit import sources
+from cloudinit import util
+import os
+import os.path
+import serial
+
+
+DEF_TTY_LOC = '/dev/ttyS1'
+DEF_TTY_TIMEOUT = 60
+LOG = logging.getLogger(__name__)
+
+SMARTOS_ATTRIB_MAP = {
+    #Cloud-init Key : (SmartOS Key, Strip line endings)
+    'local-hostname': ('hostname', True),
+    'public-keys': ('root_authorized_keys', True),
+    'user-script': ('user-script', False),
+    'user-data': ('user-data', False),
+    'iptables_disable': ('iptables_disable', True),
+    'motd_sys_info': ('motd_sys_info', True),
+}
+
+
+class DataSourceSmartOS(sources.DataSource):
+    def __init__(self, sys_cfg, distro, paths):
+        sources.DataSource.__init__(self, sys_cfg, distro, paths)
+        self.seed_dir = os.path.join(paths.seed_dir, 'sdc')
+        self.is_smartdc = None
+        self.seed = self.sys_cfg.get("serial_device", DEF_TTY_LOC)
+        self.seed_timeout = self.sys_cfg.get("serial_timeout",
+                                             DEF_TTY_TIMEOUT)
+
+    def __str__(self):
+        root = sources.DataSource.__str__(self)
+        return "%s [seed=%s]" % (root, self.seed)
+
+    def get_data(self):
+        md = {}
+        ud = ""
+
+        if not os.path.exists(self.seed):
+            LOG.debug("Host does not appear to be on SmartOS")
+            return False
+        self.seed = self.seed
+
+        dmi_info = dmi_data()
+        if dmi_info is False:
+            LOG.debug("No dmidata utility found")
+            return False
+
+        system_uuid, system_type = dmi_info
+        if 'smartdc' not in system_type.lower():
+            LOG.debug("Host is not on SmartOS")
+            return False
+        self.is_smartdc = True
+        md['instance-id'] = system_uuid
+
+        for ci_noun, attribute in SMARTOS_ATTRIB_MAP.iteritems():
+            smartos_noun, strip = attribute
+            md[ci_noun] = query_data(smartos_noun, self.seed,
+                                     self.seed_timeout, strip=strip)
+
+        if not md['local-hostname']:
+            md['local-hostname'] = system_uuid
+
+        if md['user-data']:
+            ud = md['user-data']
+        else:
+            ud = md['user-script']
+
+        self.metadata = md
+        self.userdata_raw = ud
+        return True
+
+    def get_instance_id(self):
+        return self.metadata['instance-id']
+
+
+def get_serial(seed_device, seed_timeout):
+    """This is replaced in unit testing, allowing us to replace
+        serial.Serial with a mocked class
+
+        The timeout value of 60 seconds should never be hit. The value
+        is taken from SmartOS own provisioning tools. Since we are reading
+        each line individually up until the single ".", the transfer is
+        usually very fast (i.e. microseconds) to get the response.
+    """
+    if not seed_device:
+        raise AttributeError("seed_device value is not set")
+
+    ser = serial.Serial(seed_device, timeout=seed_timeout)
+    if not ser.isOpen():
+        raise SystemError("Unable to open %s" % seed_device)
+
+    return ser
+
+
+def query_data(noun, seed_device, seed_timeout, strip=False):
+    """Makes a request to via the serial console via "GET <NOUN>"
+
+        In the response, the first line is the status, while subsequent lines
+        are is the value. A blank line with a "." is used to indicate end of
+        response.
+    """
+
+    if not noun:
+        return False
+
+    ser = get_serial(seed_device, seed_timeout)
+    ser.write("GET %s\n" % noun.rstrip())
+    status = str(ser.readline()).rstrip()
+    response = []
+    eom_found = False
+
+    if 'SUCCESS' not in status:
+        ser.close()
+        return None
+
+    while not eom_found:
+        m = ser.readline()
+        if m.rstrip() == ".":
+            eom_found = True
+        else:
+            response.append(m)
+
+    ser.close()
+    if not strip:
+        return "".join(response)
+    else:
+        return "".join(response).rstrip()
+
+    return None
+
+
+def dmi_data():
+    sys_uuid, sys_type = None, None
+    dmidecode_path = util.which('dmidecode')
+    if not dmidecode_path:
+        return False
+
+    sys_uuid_cmd = [dmidecode_path, "-s", "system-uuid"]
+    try:
+        LOG.debug("Getting hostname from dmidecode")
+        (sys_uuid, _err) = util.subp(sys_uuid_cmd)
+    except Exception as e:
+        util.logexc(LOG, "Failed to get system UUID", e)
+
+    sys_type_cmd = [dmidecode_path, "-s", "system-product-name"]
+    try:
+        LOG.debug("Determining hypervisor product name via dmidecode")
+        (sys_type, _err) = util.subp(sys_type_cmd)
+    except Exception as e:
+        util.logexc(LOG, "Failed to get system UUID", e)
+
+    return sys_uuid.lower(), sys_type
+
+
+# Used to match classes to dependencies
+datasources = [
+    (DataSourceSmartOS, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)),
+]
+
+
+# Return a list of data sources that match this set of dependencies
+def get_datasource_list(depends):
+    return sources.list_from_depends(depends, datasources)
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index d8fbacdd..974c0407 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -135,7 +135,8 @@ class DataSource(object):
 
     @property
     def availability_zone(self):
-        return self.metadata.get('availability-zone')
+        return self.metadata.get('availability-zone',
+                                 self.metadata.get('availability_zone'))
 
     def get_instance_id(self):
         if not self.metadata or 'instance-id' not in self.metadata:
diff --git a/cloudinit/stages.py b/cloudinit/stages.py
index df49cabb..3e49e8c5 100644
--- a/cloudinit/stages.py
+++ b/cloudinit/stages.py
@@ -344,12 +344,13 @@ class Init(object):
         cdir = self.paths.get_cpath("handlers")
         idir = self._get_ipath("handlers")
 
-        # Add the path to the plugins dir to the top of our list for import
-        # instance dir should be read before cloud-dir
-        if cdir and cdir not in sys.path:
-            sys.path.insert(0, cdir)
-        if idir and idir not in sys.path:
-            sys.path.insert(0, idir)
+        # Add the path to the plugins dir to the top of our list for importing
+        # new handlers.
+        #
+        # Note(harlowja): instance dir should be read before cloud-dir
+        for d in [cdir, idir]:
+            if d and d not in sys.path:
+                sys.path.insert(0, d)
 
         # Ensure datasource fetched before activation (just incase)
         user_data_msg = self.datasource.get_userdata(True)
@@ -357,24 +358,34 @@ class Init(object):
         # This keeps track of all the active handlers
         c_handlers = helpers.ContentHandlers()
 
-        # Add handlers in cdir
-        potential_handlers = util.find_modules(cdir)
-        for (fname, mod_name) in potential_handlers.iteritems():
-            try:
-                mod_locs = importer.find_module(mod_name, [''],
-                                                ['list_types',
-                                                 'handle_part'])
-                if not mod_locs:
-                    LOG.warn(("Could not find a valid user-data handler"
-                              " named %s in file %s"), mod_name, fname)
-                    continue
-                mod = importer.import_module(mod_locs[0])
-                mod = handlers.fixup_handler(mod)
-                types = c_handlers.register(mod)
-                LOG.debug("Added handler for %s from %s", types, fname)
-            except:
-                util.logexc(LOG, "Failed to register handler from %s", fname)
-
+        def register_handlers_in_dir(path):
+            # Attempts to register any handler modules under the given path.
+            if not path or not os.path.isdir(path):
+                return
+            potential_handlers = util.find_modules(path)
+            for (fname, mod_name) in potential_handlers.iteritems():
+                try:
+                    mod_locs = importer.find_module(mod_name, [''],
+                                                    ['list_types',
+                                                     'handle_part'])
+                    if not mod_locs:
+                        LOG.warn(("Could not find a valid user-data handler"
+                                  " named %s in file %s"), mod_name, fname)
+                        continue
+                    mod = importer.import_module(mod_locs[0])
+                    mod = handlers.fixup_handler(mod)
+                    types = c_handlers.register(mod)
+                    LOG.debug("Added handler for %s from %s", types, fname)
+                except Exception:
+                    util.logexc(LOG, "Failed to register handler from %s",
+                                fname)
+
+        # Add any handlers in the cloud-dir
+        register_handlers_in_dir(cdir)
+
+        # Register any other handlers that come from the default set. This
+        # is done after the cloud-dir handlers so that the cdir modules can
+        # take over the default user-data handler content-types.
         def_handlers = self._default_userdata_handlers()
         applied_def_handlers = c_handlers.register_defaults(def_handlers)
         if applied_def_handlers:
@@ -383,36 +394,51 @@ class Init(object):
         # Form our cloud interface
         data = self.cloudify()
 
-        # Init the handlers first
-        called = []
-        for (_ctype, mod) in c_handlers.iteritems():
-            if mod in called:
-                continue
-            handlers.call_begin(mod, data, frequency)
-            called.append(mod)
-
-        # Walk the user data
-        part_data = {
-            'handlers': c_handlers,
-            # Any new handlers that are encountered get writen here
-            'handlerdir': idir,
-            'data': data,
-            # The default frequency if handlers don't have one
-            'frequency': frequency,
-            # This will be used when new handlers are found
-            # to help write there contents to files with numbered
-            # names...
-            'handlercount': 0,
-        }
-        handlers.walk(user_data_msg, handlers.walker_callback, data=part_data)
+        def init_handlers():
+            # Init the handlers first
+            for (_ctype, mod) in c_handlers.iteritems():
+                if mod in c_handlers.initialized:
+                    # Avoid initing the same module twice (if said module
+                    # is registered to more than one content-type).
+                    continue
+                handlers.call_begin(mod, data, frequency)
+                c_handlers.initialized.append(mod)
+
+        def walk_handlers():
+            # Walk the user data
+            part_data = {
+                'handlers': c_handlers,
+                # Any new handlers that are encountered get writen here
+                'handlerdir': idir,
+                'data': data,
+                # The default frequency if handlers don't have one
+                'frequency': frequency,
+                # This will be used when new handlers are found
+                # to help write there contents to files with numbered
+                # names...
+                'handlercount': 0,
+            }
+            handlers.walk(user_data_msg, handlers.walker_callback,
+                          data=part_data)
+
+        def finalize_handlers():
+            # Give callbacks opportunity to finalize
+            for (_ctype, mod) in c_handlers.iteritems():
+                if mod not in c_handlers.initialized:
+                    # Said module was never inited in the first place, so lets
+                    # not attempt to finalize those that never got called.
+                    continue
+                c_handlers.initialized.remove(mod)
+                try:
+                    handlers.call_end(mod, data, frequency)
+                except:
+                    util.logexc(LOG, "Failed to finalize handler: %s", mod)
 
-        # Give callbacks opportunity to finalize
-        called = []
-        for (_ctype, mod) in c_handlers.iteritems():
-            if mod in called:
-                continue
-            handlers.call_end(mod, data, frequency)
-            called.append(mod)
+        try:
+            init_handlers()
+            walk_handlers()
+        finally:
+            finalize_handlers()
 
         # Perform post-consumption adjustments so that
         # modules that run during the init stage reflect
diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py
index df069ff8..d49ea094 100644
--- a/cloudinit/user_data.py
+++ b/cloudinit/user_data.py
@@ -23,8 +23,10 @@
 import os
 
 import email
+
 from email.mime.base import MIMEBase
 from email.mime.multipart import MIMEMultipart
+from email.mime.nonmultipart import MIMENonMultipart
 from email.mime.text import MIMEText
 
 from cloudinit import handlers
@@ -48,6 +50,18 @@ ARCHIVE_TYPES = ["text/cloud-config-archive"]
 UNDEF_TYPE = "text/plain"
 ARCHIVE_UNDEF_TYPE = "text/cloud-config"
 
+# This seems to hit most of the gzip possible content types.
+DECOMP_TYPES = [
+    'application/gzip',
+    'application/gzip-compressed',
+    'application/gzipped',
+    'application/x-compress',
+    'application/x-compressed',
+    'application/x-gunzip',
+    'application/x-gzip',
+    'application/x-gzip-compressed',
+]
+
 # Msg header used to track attachments
 ATTACHMENT_FIELD = 'Number-Attachments'
 
@@ -56,6 +70,17 @@ ATTACHMENT_FIELD = 'Number-Attachments'
 EXAMINE_FOR_LAUNCH_INDEX = ["text/cloud-config"]
 
 
+def _replace_header(msg, key, value):
+    del msg[key]
+    msg[key] = value
+
+
+def _set_filename(msg, filename):
+    del msg['Content-Disposition']
+    msg.add_header('Content-Disposition',
+                   'attachment', filename=str(filename))
+
+
 class UserDataProcessor(object):
     def __init__(self, paths):
         self.paths = paths
@@ -67,6 +92,10 @@ class UserDataProcessor(object):
         return accumulating_msg
 
     def _process_msg(self, base_msg, append_msg):
+
+        def find_ctype(payload):
+            return handlers.type_from_starts_with(payload)
+
         for part in base_msg.walk():
             if is_skippable(part):
                 continue
@@ -74,21 +103,51 @@ class UserDataProcessor(object):
             ctype = None
             ctype_orig = part.get_content_type()
             payload = part.get_payload(decode=True)
+            was_compressed = False
+
+            # When the message states it is of a gzipped content type ensure
+            # that we attempt to decode said payload so that the decompressed
+            # data can be examined (instead of the compressed data).
+            if ctype_orig in DECOMP_TYPES:
+                try:
+                    payload = util.decomp_gzip(payload, quiet=False)
+                    # At this point we don't know what the content-type is
+                    # since we just decompressed it.
+                    ctype_orig = None
+                    was_compressed = True
+                except util.DecompressionError as e:
+                    LOG.warn("Failed decompressing payload from %s of length"
+                             " %s due to: %s", ctype_orig, len(payload), e)
+                    continue
 
+            # Attempt to figure out the payloads content-type
             if not ctype_orig:
                 ctype_orig = UNDEF_TYPE
-
             if ctype_orig in TYPE_NEEDED:
-                ctype = handlers.type_from_starts_with(payload)
-
+                ctype = find_ctype(payload)
             if ctype is None:
                 ctype = ctype_orig
 
+            # In the case where the data was compressed, we want to make sure
+            # that we create a new message that contains the found content
+            # type with the uncompressed content since later traversals of the
+            # messages will expect a part not compressed.
+            if was_compressed:
+                maintype, subtype = ctype.split("/", 1)
+                n_part = MIMENonMultipart(maintype, subtype)
+                n_part.set_payload(payload)
+                # Copy various headers from the old part to the new one,
+                # but don't include all the headers since some are not useful
+                # after decoding and decompression.
+                if part.get_filename():
+                    _set_filename(n_part, part.get_filename())
+                for h in ('Launch-Index',):
+                    if h in part:
+                        _replace_header(n_part, h, str(part[h]))
+                part = n_part
+
             if ctype != ctype_orig:
-                if CONTENT_TYPE in part:
-                    part.replace_header(CONTENT_TYPE, ctype)
-                else:
-                    part[CONTENT_TYPE] = ctype
+                _replace_header(part, CONTENT_TYPE, ctype)
 
             if ctype in INCLUDE_TYPES:
                 self._do_include(payload, append_msg)
@@ -98,12 +157,9 @@ class UserDataProcessor(object):
                 self._explode_archive(payload, append_msg)
                 continue
 
-            # Should this be happening, shouldn't
+            # TODO(harlowja): Should this be happening, shouldn't
             # the part header be modified and not the base?
-            if CONTENT_TYPE in base_msg:
-                base_msg.replace_header(CONTENT_TYPE, ctype)
-            else:
-                base_msg[CONTENT_TYPE] = ctype
+            _replace_header(base_msg, CONTENT_TYPE, ctype)
 
             self._attach_part(append_msg, part)
 
@@ -138,8 +194,7 @@ class UserDataProcessor(object):
 
     def _process_before_attach(self, msg, attached_id):
         if not msg.get_filename():
-            msg.add_header('Content-Disposition',
-                           'attachment', filename=PART_FN_TPL % (attached_id))
+            _set_filename(msg, PART_FN_TPL % (attached_id))
         self._attach_launch_index(msg)
 
     def _do_include(self, content, append_msg):
@@ -217,13 +272,15 @@ class UserDataProcessor(object):
                 msg.set_payload(content)
 
             if 'filename' in ent:
-                msg.add_header('Content-Disposition',
-                               'attachment', filename=ent['filename'])
+                _set_filename(msg, ent['filename'])
             if 'launch-index' in ent:
                 msg.add_header('Launch-Index', str(ent['launch-index']))
 
             for header in list(ent.keys()):
-                if header in ('content', 'filename', 'type', 'launch-index'):
+                if header.lower() in ('content', 'filename', 'type',
+                                      'launch-index', 'content-disposition',
+                                      ATTACHMENT_FIELD.lower(),
+                                      CONTENT_TYPE.lower()):
                     continue
                 msg.add_header(header, ent[header])
 
@@ -238,13 +295,13 @@ class UserDataProcessor(object):
             outer_msg[ATTACHMENT_FIELD] = '0'
 
         if new_count is not None:
-            outer_msg.replace_header(ATTACHMENT_FIELD, str(new_count))
+            _replace_header(outer_msg, ATTACHMENT_FIELD, str(new_count))
 
         fetched_count = 0
         try:
             fetched_count = int(outer_msg.get(ATTACHMENT_FIELD))
         except (ValueError, TypeError):
-            outer_msg.replace_header(ATTACHMENT_FIELD, str(fetched_count))
+            _replace_header(outer_msg, ATTACHMENT_FIELD, str(fetched_count))
         return fetched_count
 
     def _attach_part(self, outer_msg, part):
@@ -276,10 +333,7 @@ def convert_string(raw_data, headers=None):
     if "mime-version:" in data[0:4096].lower():
         msg = email.message_from_string(data)
         for (key, val) in headers.iteritems():
-            if key in msg:
-                msg.replace_header(key, val)
-            else:
-                msg[key] = val
+            _replace_header(msg, key, val)
     else:
         mtype = headers.get(CONTENT_TYPE, NOT_MULTIPART_TYPE)
         maintype, subtype = mtype.split("/", 1)
diff --git a/cloudinit/util.py b/cloudinit/util.py
index c45aae06..8542fe27 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -1530,6 +1530,14 @@ def shellify(cmdlist, add_header=True):
     return content
 
 
+def strip_prefix_suffix(line, prefix=None, suffix=None):
+    if prefix and line.startswith(prefix):
+        line = line[len(prefix):]
+    if suffix and line.endswith(suffix):
+        line = line[:-len(suffix)]
+    return line
+
+
 def is_container():
     """
     Checks to see if this code running in a container of some sort
@@ -1743,3 +1751,22 @@ def get_mount_info(path, log=LOG):
     mountinfo_path = '/proc/%s/mountinfo' % os.getpid()
     lines = load_file(mountinfo_path).splitlines()
     return parse_mount_info(path, lines, log)
+
+
+def which(program):
+    # Return path of program for execution if found in path
+    def is_exe(fpath):
+        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
+
+    _fpath, _ = os.path.split(program)
+    if _fpath:
+        if is_exe(program):
+            return program
+    else:
+        for path in os.environ["PATH"].split(os.pathsep):
+            path = path.strip('"')
+            exe_file = os.path.join(path, program)
+            if is_exe(exe_file):
+                return exe_file
+
+    return None
diff --git a/cloudinit/version.py b/cloudinit/version.py
index 024d5118..4b29a587 100644
--- a/cloudinit/version.py
+++ b/cloudinit/version.py
@@ -20,7 +20,7 @@ from distutils import version as vr
 
 
 def version():
-    return vr.StrictVersion("0.7.2")
+    return vr.StrictVersion("0.7.3")
 
 
 def version_string():