diff options
Diffstat (limited to 'cloudinit/user_data.py')
-rw-r--r-- | cloudinit/user_data.py | 262 |
1 files changed, 262 insertions, 0 deletions
diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py new file mode 100644 index 00000000..ec914480 --- /dev/null +++ b/cloudinit/user_data.py @@ -0,0 +1,262 @@ +# vi: ts=4 expandtab +# +# Copyright (C) 2009-2010 Canonical Ltd. +# Copyright (C) 2012 Hewlett-Packard Development Company, L.P. +# +# Author: Scott Moser <scott.moser@canonical.com> +# Author: Juerg Hafliger <juerg.haefliger@hp.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 3, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import email + +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from email.mime.base import MIMEBase +import yaml +import cloudinit +import cloudinit.util as util +import hashlib +import urllib + + +starts_with_mappings = { + '#include': 'text/x-include-url', + '#include-once': 'text/x-include-once-url', + '#!': 'text/x-shellscript', + '#cloud-config': 'text/cloud-config', + '#upstart-job': 'text/upstart-job', + '#part-handler': 'text/part-handler', + '#cloud-boothook': 'text/cloud-boothook', + '#cloud-config-archive': 'text/cloud-config-archive', +} + + +# if 'string' is compressed return decompressed otherwise return it +def decomp_str(string): + import StringIO + import gzip + try: + uncomp = gzip.GzipFile(None, "rb", 1, StringIO.StringIO(string)).read() + return(uncomp) + except: + return(string) + + +def do_include(content, appendmsg): + import os + # is just a list of urls, one per line + # also support '#include <url here>' + includeonce = False + for line in content.splitlines(): + if line == "#include": + continue + if line == "#include-once": + includeonce = True + continue + if line.startswith("#include-once"): + line = line[len("#include-once"):].lstrip() + includeonce = True + elif line.startswith("#include"): + line = line[len("#include"):].lstrip() + if line.startswith("#"): + continue + if line.strip() == "": + continue + + # urls cannot not have leading or trailing white space + msum = hashlib.md5() # pylint: disable=E1101 + msum.update(line.strip()) + includeonce_filename = "%s/urlcache/%s" % ( + cloudinit.get_ipath_cur("data"), msum.hexdigest()) + try: + if includeonce and os.path.isfile(includeonce_filename): + with open(includeonce_filename, "r") as fp: + content = fp.read() + else: + content = urllib.urlopen(line).read() + if includeonce: + util.write_file(includeonce_filename, content, mode=0600) + except Exception: + raise + + process_includes(message_from_string(decomp_str(content)), appendmsg) + + +def explode_cc_archive(archive, appendmsg): + for ent in yaml.load(archive): + # ent can be one of: + # dict { 'filename' : 'value', 'content' : 'value', 'type' : 'value' } + # filename and type not be present + # or + # scalar(payload) + + def_type = "text/cloud-config" + if isinstance(ent, str): + ent = {'content': ent} + + content = ent.get('content', '') + mtype = ent.get('type', None) + if mtype == None: + mtype = type_from_startswith(content, def_type) + + maintype, subtype = mtype.split('/', 1) + if maintype == "text": + msg = MIMEText(content, _subtype=subtype) + else: + msg = MIMEBase(maintype, subtype) + msg.set_payload(content) + + if 'filename' in ent: + msg.add_header('Content-Disposition', 'attachment', + filename=ent['filename']) + + for header in ent.keys(): + if header in ('content', 'filename', 'type'): + continue + msg.add_header(header, ent['header']) + + _attach_part(appendmsg, msg) + + +def multi_part_count(outermsg, newcount=None): + """ + Return the number of attachments to this MIMEMultipart by looking + at its 'Number-Attachments' header. + """ + nfield = 'Number-Attachments' + if nfield not in outermsg: + outermsg[nfield] = "0" + + if newcount != None: + outermsg.replace_header(nfield, str(newcount)) + + return(int(outermsg.get('Number-Attachments', 0))) + + +def _attach_part(outermsg, part): + """ + Attach an part to an outer message. outermsg must be a MIMEMultipart. + Modifies a header in outermsg to keep track of number of attachments. + """ + cur = multi_part_count(outermsg) + if not part.get_filename(None): + part.add_header('Content-Disposition', 'attachment', + filename='part-%03d' % (cur + 1)) + outermsg.attach(part) + multi_part_count(outermsg, cur + 1) + + +def type_from_startswith(payload, default=None): + # slist is sorted longest first + slist = sorted(starts_with_mappings.keys(), key=lambda e: 0 - len(e)) + for sstr in slist: + if payload.startswith(sstr): + return(starts_with_mappings[sstr]) + return default + + +def process_includes(msg, appendmsg=None): + if appendmsg == None: + appendmsg = MIMEMultipart() + + for part in msg.walk(): + # multipart/* are just containers + if part.get_content_maintype() == 'multipart': + continue + + ctype = None + ctype_orig = part.get_content_type() + + payload = part.get_payload(decode=True) + + if ctype_orig in ("text/plain", "text/x-not-multipart"): + ctype = type_from_startswith(payload) + + if ctype is None: + ctype = ctype_orig + + if ctype in ('text/x-include-url', 'text/x-include-once-url'): + do_include(payload, appendmsg) + continue + + if ctype == "text/cloud-config-archive": + explode_cc_archive(payload, appendmsg) + continue + + if 'Content-Type' in msg: + msg.replace_header('Content-Type', ctype) + else: + msg['Content-Type'] = ctype + + _attach_part(appendmsg, part) + + +def message_from_string(data, headers=None): + if headers is None: + headers = {} + if "mime-version:" in data[0:4096].lower(): + msg = email.message_from_string(data) + for (key, val) in headers.items(): + if key in msg: + msg.replace_header(key, val) + else: + msg[key] = val + else: + mtype = headers.get("Content-Type", "text/x-not-multipart") + maintype, subtype = mtype.split("/", 1) + msg = MIMEBase(maintype, subtype, *headers) + msg.set_payload(data) + + return(msg) + + +# this is heavily wasteful, reads through userdata string input +def preprocess_userdata(data): + newmsg = MIMEMultipart() + process_includes(message_from_string(decomp_str(data)), newmsg) + return(newmsg.as_string()) + + +# callback is a function that will be called with (data, content_type, +# filename, payload) +def walk_userdata(istr, callback, data=None): + partnum = 0 + for part in message_from_string(istr).walk(): + # multipart/* are just containers + if part.get_content_maintype() == 'multipart': + continue + + ctype = part.get_content_type() + if ctype is None: + ctype = 'application/octet-stream' + + filename = part.get_filename() + if not filename: + filename = 'part-%03d' % partnum + + callback(data, ctype, filename, part.get_payload(decode=True)) + + partnum = partnum + 1 + + +if __name__ == "__main__": + def main(): + import sys + data = decomp_str(file(sys.argv[1]).read()) + newmsg = MIMEMultipart() + process_includes(message_from_string(data), newmsg) + print newmsg + print "#found %s parts" % multi_part_count(newmsg) + + main() |