2 files changed, 78 insertions, 23 deletions
diff --git a/ChangeLog b/ChangeLog
index 4cdd20ad..6f2d1c66 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -7,6 +7,7 @@
  - add support for SuSE / SLES [Juerg Haefliger]
  - add a trailing carriage return to chpasswd input, which reportedly
    caused a problem on rhel5 if missing.
+ - support individual MIME segments to be gzip compressed (LP: #1203203)
 0.7.2:
  - add a debian watch file
  - add 'sudo' entry to ubuntu's default user (LP: #1080717)
diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py
index df069ff8..d49ea094 100644
--- a/cloudinit/user_data.py
+++ b/cloudinit/user_data.py
@@ -23,8 +23,10 @@
 import os
 
 import email
+
 from email.mime.base import MIMEBase
 from email.mime.multipart import MIMEMultipart
+from email.mime.nonmultipart import MIMENonMultipart
 from email.mime.text import MIMEText
 
 from cloudinit import handlers
@@ -48,6 +50,18 @@ ARCHIVE_TYPES = ["text/cloud-config-archive"]
 UNDEF_TYPE = "text/plain"
 ARCHIVE_UNDEF_TYPE = "text/cloud-config"
 
+# This seems to hit most of the gzip possible content types.
+DECOMP_TYPES = [
+    'application/gzip',
+    'application/gzip-compressed',
+    'application/gzipped',
+    'application/x-compress',
+    'application/x-compressed',
+    'application/x-gunzip',
+    'application/x-gzip',
+    'application/x-gzip-compressed',
+]
+
 # Msg header used to track attachments
 ATTACHMENT_FIELD = 'Number-Attachments'
 
@@ -56,6 +70,17 @@ ATTACHMENT_FIELD = 'Number-Attachments'
 EXAMINE_FOR_LAUNCH_INDEX = ["text/cloud-config"]
 
 
+def _replace_header(msg, key, value):
+    del msg[key]
+    msg[key] = value
+
+
+def _set_filename(msg, filename):
+    del msg['Content-Disposition']
+    msg.add_header('Content-Disposition',
+                   'attachment', filename=str(filename))
+
+
 class UserDataProcessor(object):
     def __init__(self, paths):
         self.paths = paths
@@ -67,6 +92,10 @@ class UserDataProcessor(object):
         return accumulating_msg
 
     def _process_msg(self, base_msg, append_msg):
+
+        def find_ctype(payload):
+            return handlers.type_from_starts_with(payload)
+
         for part in base_msg.walk():
             if is_skippable(part):
                 continue
@@ -74,21 +103,51 @@ class UserDataProcessor(object):
             ctype = None
             ctype_orig = part.get_content_type()
             payload = part.get_payload(decode=True)
+            was_compressed = False
+
+            # When the message states it is of a gzipped content type ensure
+            # that we attempt to decode said payload so that the decompressed
+            # data can be examined (instead of the compressed data).
+            if ctype_orig in DECOMP_TYPES:
+                try:
+                    payload = util.decomp_gzip(payload, quiet=False)
+                    # At this point we don't know what the content-type is
+                    # since we just decompressed it.
+                    ctype_orig = None
+                    was_compressed = True
+                except util.DecompressionError as e:
+                    LOG.warn("Failed decompressing payload from %s of length"
+                             " %s due to: %s", ctype_orig, len(payload), e)
+                    continue
 
+            # Attempt to figure out the payloads content-type
             if not ctype_orig:
                 ctype_orig = UNDEF_TYPE
-
             if ctype_orig in TYPE_NEEDED:
-                ctype = handlers.type_from_starts_with(payload)
-
+                ctype = find_ctype(payload)
             if ctype is None:
                 ctype = ctype_orig
 
+            # In the case where the data was compressed, we want to make sure
+            # that we create a new message that contains the found content
+            # type with the uncompressed content since later traversals of the
+            # messages will expect a part not compressed.
+            if was_compressed:
+                maintype, subtype = ctype.split("/", 1)
+                n_part = MIMENonMultipart(maintype, subtype)
+                n_part.set_payload(payload)
+                # Copy various headers from the old part to the new one,
+                # but don't include all the headers since some are not useful
+                # after decoding and decompression.
+                if part.get_filename():
+                    _set_filename(n_part, part.get_filename())
+                for h in ('Launch-Index',):
+                    if h in part:
+                        _replace_header(n_part, h, str(part[h]))
+                part = n_part
+
             if ctype != ctype_orig:
-                if CONTENT_TYPE in part:
-                    part.replace_header(CONTENT_TYPE, ctype)
-                else:
-                    part[CONTENT_TYPE] = ctype
+                _replace_header(part, CONTENT_TYPE, ctype)
 
             if ctype in INCLUDE_TYPES:
                 self._do_include(payload, append_msg)
@@ -98,12 +157,9 @@ class UserDataProcessor(object):
                 self._explode_archive(payload, append_msg)
                 continue
 
-            # Should this be happening, shouldn't
+            # TODO(harlowja): Should this be happening, shouldn't
             # the part header be modified and not the base?
-            if CONTENT_TYPE in base_msg:
-                base_msg.replace_header(CONTENT_TYPE, ctype)
-            else:
-                base_msg[CONTENT_TYPE] = ctype
+            _replace_header(base_msg, CONTENT_TYPE, ctype)
 
             self._attach_part(append_msg, part)
 
@@ -138,8 +194,7 @@ class UserDataProcessor(object):
 
     def _process_before_attach(self, msg, attached_id):
         if not msg.get_filename():
-            msg.add_header('Content-Disposition',
-                           'attachment', filename=PART_FN_TPL % (attached_id))
+            _set_filename(msg, PART_FN_TPL % (attached_id))
         self._attach_launch_index(msg)
 
     def _do_include(self, content, append_msg):
@@ -217,13 +272,15 @@ class UserDataProcessor(object):
                 msg.set_payload(content)
 
             if 'filename' in ent:
-                msg.add_header('Content-Disposition',
-                               'attachment', filename=ent['filename'])
+                _set_filename(msg, ent['filename'])
             if 'launch-index' in ent:
                 msg.add_header('Launch-Index', str(ent['launch-index']))
 
             for header in list(ent.keys()):
-                if header in ('content', 'filename', 'type', 'launch-index'):
+                if header.lower() in ('content', 'filename', 'type',
+                                      'launch-index', 'content-disposition',
+                                      ATTACHMENT_FIELD.lower(),
+                                      CONTENT_TYPE.lower()):
                     continue
                 msg.add_header(header, ent[header])
 
@@ -238,13 +295,13 @@ class UserDataProcessor(object):
             outer_msg[ATTACHMENT_FIELD] = '0'
 
         if new_count is not None:
-            outer_msg.replace_header(ATTACHMENT_FIELD, str(new_count))
+            _replace_header(outer_msg, ATTACHMENT_FIELD, str(new_count))
 
         fetched_count = 0
         try:
             fetched_count = int(outer_msg.get(ATTACHMENT_FIELD))
         except (ValueError, TypeError):
-            outer_msg.replace_header(ATTACHMENT_FIELD, str(fetched_count))
+            _replace_header(outer_msg, ATTACHMENT_FIELD, str(fetched_count))
         return fetched_count
 
     def _attach_part(self, outer_msg, part):
@@ -276,10 +333,7 @@ def convert_string(raw_data, headers=None):
     if "mime-version:" in data[0:4096].lower():
         msg = email.message_from_string(data)
         for (key, val) in headers.iteritems():
-            if key in msg:
-                msg.replace_header(key, val)
-            else:
-                msg[key] = val
+            _replace_header(msg, key, val)
     else:
         mtype = headers.get(CONTENT_TYPE, NOT_MULTIPART_TYPE)
         maintype, subtype = mtype.split("/", 1)