fix issue with part-handlers and base64 encoding (LP: #874342)

Previously, * if content came into cloud-init for processing came in via a multipart mime file, and was already base64 encoded, it would get base64 encoded again before being handed to a part-handler. * if it came in via a '#include'd file then it would not be encoded at all. This drops the internal 'parts' array, that was just converted to and then from. Instead, we keep MIME format throughout and keep headers along the way. That means that a message that comes in with 'Content-Transfer-Encoding' set to 'base64' will be decoded before being handed to a part-handler. It also reduces the chance of failure due to content appearing to be an actual email. Previously if content contained colon separated fields, it might be read as headers (email.message_from_string(open("/etc/passwd","r")) would come back as all headers, no payload) The weak point right now is that '#include'd data cannot have mime types associated with it (unless it is a mime formatted content). I had hoped to read user headers and possibly set 'Content-Type' from that. LP: #874342
author: Scott Moser <smoser@ubuntu.com> 2011-12-22 03:45:48 -0500
committer: Scott Moser <smoser@ubuntu.com> 2011-12-22 03:45:48 -0500
commit: 7292e8aabf196492d391e73db8bd7346d11b0e63 (patch)
tree: 088f118fcdc136d1bfd4d26973a121880fc981d0 /cloudinit
parent: 354854c3a84b586f80da3da7691c3831fa3d7204 (diff)
parent: 585284eca9e9219c6517eebc02d45edc800cf72d (diff)
download: vyos-cloud-init-7292e8aabf196492d391e73db8bd7346d11b0e63.tar.gz
vyos-cloud-init-7292e8aabf196492d391e73db8bd7346d11b0e63.zip
1 files changed, 97 insertions, 73 deletions
diff --git a/cloudinit/UserDataHandler.py b/cloudinit/UserDataHandler.py
index 982e57f0..541ee87a 100644
--- a/cloudinit/UserDataHandler.py
+++ b/cloudinit/UserDataHandler.py
@@ -25,6 +25,8 @@ import yaml
 import cloudinit
 import cloudinit.util as util
 import hashlib
+import os
+import urllib
 
 starts_with_mappings={
     '#include' : 'text/x-include-url',
@@ -47,13 +49,12 @@ def decomp_str(str):
     except:
         return(str)
 
-def do_include(str,parts):
-    import urllib
+def do_include(content, appendmsg):
     import os
     # is just a list of urls, one per line
     # also support '#include <url here>'
     includeonce = False
-    for line in str.splitlines():
+    for line in content.splitlines():
         if line == "#include": continue
         if line == "#include-once":
             includeonce = True
@@ -81,31 +82,70 @@ def do_include(str,parts):
         except Exception as e:
             raise
 
-        process_includes(email.message_from_string(decomp_str(content)),parts)
+        process_includes(message_from_string(decomp_str(content)), appendmsg)
 
 
-def explode_cc_archive(archive,parts):
+def explode_cc_archive(archive, appendmsg):
     for ent in yaml.load(archive):
         # ent can be one of:
         #  dict { 'filename' : 'value' , 'content' : 'value', 'type' : 'value' }
         #    filename and type not be present
         # or
         #  scalar(payload)
-        filename = 'part-%03d' % len(parts['content'])
+        
         def_type = "text/cloud-config"
         if isinstance(ent,str):
-            content = ent
-            mtype = type_from_startswith(content,def_type)
+            ent = { 'content': ent }
+
+        content = ent.get('content', '')
+        mtype = ent.get('type', None)
+        if mtype == None:
+            mtype = type_from_startswith(content, def_type)
+
+        maintype, subtype = mtype.split('/', 1)
+        if maintype == "text":
+            msg = MIMEText(content, _subtype=subtype)
         else:
-            content = ent.get('content', '')
-            filename = ent.get('filename', filename)
-            mtype = ent.get('type', None)
-            if mtype == None:
-                mtype = type_from_startswith(payload,def_type)
-
-        parts['content'].append(content)
-        parts['names'].append(filename)
-        parts['types'].append(mtype)
+            msg = MIMEBase(maintype, subtype)
+            msg.set_payload(content)
+
+        if 'filename' in ent:
+            msg.add_header('Content-Disposition', 'attachment',
+                           filename=ent['filename'])
+
+        for header in ent.keys():
+            if header in ('content', 'filename', 'type'):
+                continue
+            msg.add_header(header, ent['header'])
+
+        _attach_part(appendmsg,msg)
+
+
+def multi_part_count(outermsg, newcount=None):
+    """
+    Return the number of attachments to this MIMEMultipart by looking
+    at its 'Number-Attachments' header.
+    """
+    nfield = 'Number-Attachments'
+    if nfield not in outermsg:
+        outermsg[nfield] = "0"
+
+    if newcount != None:
+        outermsg.replace_header(nfield, str(newcount))
+
+    return(int(outermsg.get('Number-Attachments', 0)))
+
+def _attach_part(outermsg, part):
+    """
+    Attach an part to an outer message. outermsg must be a MIMEMultipart.
+    Modifies a header in outermsg to keep track of number of attachments.
+    """
+    cur = multi_part_count(outermsg)
+    if not part.get_filename(None):
+        part.add_header('Content-Disposition', 'attachment',
+            filename = 'part-%03d' % (cur+1))
+    outermsg.attach(part)
+    multi_part_count(outermsg, cur+1)
     
 def type_from_startswith(payload, default=None):
     # slist is sorted longest first
@@ -115,85 +155,69 @@ def type_from_startswith(payload, default=None):
             return(starts_with_mappings[sstr])
     return default
 
-def process_includes(msg,parts):
-    # parts is a dictionary of arrays
-    # parts['content']
-    # parts['names']
-    # parts['types']
-    for t in ( 'content', 'names', 'types' ):
-        if not parts.has_key(t):
-            parts[t]=[ ]
+def process_includes(msg, appendmsg=None):
+    if appendmsg == None:
+        appendmsg = MIMEMultipart()
+
     for part in msg.walk():
         # multipart/* are just containers
         if part.get_content_maintype() == 'multipart':
             continue
 
-        payload = part.get_payload()
-
         ctype = None
         ctype_orig = part.get_content_type()
+
+        payload = part.get_payload(decode=True)
+
         if ctype_orig == "text/plain":
             ctype = type_from_startswith(payload)
 
         if ctype is None:
             ctype = ctype_orig
 
-        if ctype == 'text/x-include-url':
-            do_include(payload,parts)
-            continue
-
-        if ctype == 'text/x-include-once-url':
-            do_include(payload,parts)
+        if ctype in ('text/x-include-url', 'text/x-include-once-url'):
+            do_include(payload, appendmsg)
             continue
 
         if ctype == "text/cloud-config-archive":
-            explode_cc_archive(payload,parts)
+            explode_cc_archive(payload, appendmsg)
             continue
 
-        filename = part.get_filename()
-        if not filename:
-            filename = 'part-%03d' % len(parts['content'])
-
-        parts['content'].append(payload)
-        parts['types'].append(ctype)
-        parts['names'].append(filename)
+        if 'Content-Type' in msg:
+            msg.replace_header('Content-Type', ctype)
+        else:
+            msg['Content-Type'] = ctype
 
-def parts2mime(parts):
-    outer = MIMEMultipart()
+        _attach_part(appendmsg, part)
 
-    i = 0
-    while i < len(parts['content']):
-        if parts['types'][i] is None:
-            # No guess could be made, or the file is encoded (compressed), so
-            # use a generic bag-of-bits type.
-            ctype = 'application/octet-stream'
-        else: ctype = parts['types'][i]
-        maintype, subtype = ctype.split('/', 1)
-        if maintype == 'text':
-            msg = MIMEText(parts['content'][i], _subtype=subtype)
-        else:
-            msg = MIMEBase(maintype, subtype)
-            msg.set_payload(parts['content'][i])
-            # Encode the payload using Base64
-            encoders.encode_base64(msg)
-        # Set the filename parameter
-        msg.add_header('Content-Disposition', 'attachment', 
-            filename=parts['names'][i])
-        outer.attach(msg)
+def message_from_string(data, headers={}):
+    if "mime-version:" in data[0:4096].lower():
+        was_mime = True
+        msg = email.message_from_string(data)
+        for (key,val) in headers.items():
+            if key in msg:
+                msg.replace_header(key,val)
+            else:
+                msg[key] = val
+    else:
+        was_mime = False
+        mtype = headers.get("Content-Type","text/plain")
+        maintype, subtype = mtype.split("/", 1)
+        msg = MIMEBase(maintype, subtype, *headers)
+        msg.set_payload(data)
 
-        i=i+1
-    return(outer.as_string())
+    return(msg)
 
 # this is heavily wasteful, reads through userdata string input
 def preprocess_userdata(data):
-    parts = { }
-    process_includes(email.message_from_string(decomp_str(data)),parts)
-    return(parts2mime(parts))
+    newmsg = MIMEMultipart()
+    process_includes(message_from_string(decomp_str(data)), newmsg)
+    return(newmsg.as_string())
 
 # callback is a function that will be called with (data, content_type, filename, payload)
 def walk_userdata(istr, callback, data = None):
     partnum = 0
-    for part in email.message_from_string(istr).walk():
+    for part in message_from_string(istr).walk():
         # multipart/* are just containers
         if part.get_content_maintype() == 'multipart':
             continue
@@ -206,14 +230,14 @@ def walk_userdata(istr, callback, data = None):
         if not filename:
             filename = 'part-%03d' % partnum
 
-        callback(data, ctype, filename, part.get_payload())
+        callback(data, ctype, filename, part.get_payload(decode=True))
 
         partnum = partnum+1
 
 if __name__ == "__main__":
     import sys
     data = decomp_str(file(sys.argv[1]).read())
-    parts = { }
-    process_includes(email.message_from_string(data),parts)
-    print "#found %s parts" % len(parts['content'])
-    print parts2mime(parts)
+    newmsg = MIMEMultipart()
+    process_includes(message_from_string(data), newmsg)
+    print newmsg
+    print "#found %s parts" % multi_part_count(newmsg)
author	Scott Moser <smoser@ubuntu.com>	2011-12-22 03:45:48 -0500
committer	Scott Moser <smoser@ubuntu.com>	2011-12-22 03:45:48 -0500
commit	7292e8aabf196492d391e73db8bd7346d11b0e63 (patch)
tree	088f118fcdc136d1bfd4d26973a121880fc981d0 /cloudinit
parent	354854c3a84b586f80da3da7691c3831fa3d7204 (diff)
parent	585284eca9e9219c6517eebc02d45edc800cf72d (diff)
download	vyos-cloud-init-7292e8aabf196492d391e73db8bd7346d11b0e63.tar.gz vyos-cloud-init-7292e8aabf196492d391e73db8bd7346d11b0e63.zip