1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
import email
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
starts_with_mappings={
'#include' : 'text/x-include-url',
'#!' : 'text/x-shellscript',
'#cloud-config' : 'text/cloud-config'
}
# if 'str' is compressed return decompressed otherwise return it
def decomp_str(str):
import StringIO
import gzip
try:
uncomp = gzip.GzipFile(None,"rb",1,StringIO.StringIO(str)).read()
return(uncomp)
except:
return(str)
def do_include(str,parts):
import urllib
# is just a list of urls, one per line
for line in str.splitlines():
if line == "#include": continue
content = urllib.urlopen(line).read()
process_includes(email.message_from_string(decomp_str(content)),parts)
def process_includes(msg,parts):
# parts is a dictionary of arrays
# parts['content']
# parts['names']
# parts['types']
for t in ( 'content', 'names', 'types' ):
if not parts.has_key(t):
parts[t]=[ ]
for part in msg.walk():
# multipart/* are just containers
if part.get_content_maintype() == 'multipart':
continue
payload = part.get_payload()
ctype = None
for str, gtype in starts_with_mappings.items():
if payload.startswith(str):
ctype = gtype
break
if ctype is None:
ctype = part.get_content_type()
if ctype == 'text/x-include-url':
do_include(payload,parts)
continue
filename = part.get_filename()
if not filename:
filename = 'part-%03d' % len(parts['content'])
parts['content'].append(payload)
parts['types'].append(ctype)
parts['names'].append(filename)
def parts2mime(parts):
outer = MIMEMultipart()
i = 0
while i < len(parts['content']):
if parts['types'][i] is None:
# No guess could be made, or the file is encoded (compressed), so
# use a generic bag-of-bits type.
ctype = 'application/octet-stream'
else: ctype = parts['types'][i]
maintype, subtype = ctype.split('/', 1)
if maintype == 'text':
msg = MIMEText(parts['content'][i], _subtype=subtype)
else:
msg = MIMEBase(maintype, subtype)
msg.set_payload(parts['content'][i])
# Encode the payload using Base64
encoders.encode_base64(msg)
# Set the filename parameter
msg.add_header('Content-Disposition', 'attachment',
filename=parts['names'][i])
outer.attach(msg)
i=i+1
return(outer.as_string())
# this is heavily wasteful, reads through userdata string input
def preprocess_userdata(data):
parts = { }
process_includes(email.message_from_string(decomp_str(data)),parts)
return(parts2mime(parts))
# callbacks is a dictionary with:
# { 'content-type': handler(data,content_type,filename,payload) }
def walk_userdata(str, callbacks, data = None):
partnum = 0
for part in email.message_from_string(str).walk():
# multipart/* are just containers
if part.get_content_maintype() == 'multipart':
continue
ctype = part.get_content_type()
if ctype is None:
ctype = 'application/octet-stream'
filename = part.get_filename()
if not filename:
filename = 'part-%03d' % partnum
if callbacks.has_key(ctype):
callbacks[ctype](data,ctype,filename,part.get_payload())
partnum = partnum+1
if __name__ == "__main__":
import sys
data = decomp_str(file(sys.argv[1]).read())
parts = { }
process_includes(email.message_from_string(data),parts)
print "#found %s parts" % len(parts['content'])
print parts2mime(parts)
|