# vi: ts=4 expandtab # # Copyright (C) 2012 Canonical Ltd. # Copyright (C) 2012 Hewlett-Packard Development Company, L.P. # Copyright (C) 2012 Yahoo! Inc. # # Author: Scott Moser # Author: Juerg Haefliger # Author: Joshua Harlow # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License version 3, as # published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import abc import os import email from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.mime.base import MIMEBase from cloudinit import importer from cloudinit import log as logging from cloudinit import url_helper from cloudinit import util from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE, FREQUENCIES) LOG = logging.getLogger(__name__) # Special content types that signal the start and end of processing CONTENT_END = "__end__" CONTENT_START = "__begin__" CONTENT_SIGNALS = [CONTENT_START, CONTENT_END] # Used when a part-handler type is encountered # to allow for registration of new types. PART_CONTENT_TYPES = ["text/part-handler"] PART_HANDLER_FN_TMPL = 'part-handler-%03d' # For parts without filenames PART_FN_TPL = 'part-%03d' # Used as the content type when a message is not multipart # and it doesn't contain its own content-type NOT_MULTIPART_TYPE = "text/x-not-multipart" OCTET_TYPE = 'application/octet-stream' # Different file beginnings to there content type INCLUSION_TYPES_MAP = { '#include': 'text/x-include-url', '#include-once': 'text/x-include-once-url', '#!': 'text/x-shellscript', '#cloud-config': 'text/cloud-config', '#upstart-job': 'text/upstart-job', '#part-handler': 'text/part-handler', '#cloud-boothook': 'text/cloud-boothook', '#cloud-config-archive': 'text/cloud-config-archive', } # Sorted longest first INCLUSION_SRCH = sorted(list(INCLUSION_TYPES_MAP.keys()), key=(lambda e: 0 - len(e))) # Various special content types TYPE_NEEDED = ["text/plain", "text/x-not-multipart"] INCLUDE_TYPES = ['text/x-include-url', 'text/x-include-once-url'] ARCHIVE_TYPES = ["text/cloud-config-archive"] UNDEF_TYPE = "text/plain" ARCHIVE_UNDEF_TYPE = "text/cloud-config" OCTET_TYPE = 'application/octet-stream' # Msg header used to track attachments ATTACHMENT_FIELD = 'Number-Attachments' class UserDataProcessor(object): def __init__(self, paths): self.paths = paths def process(self, blob): base_msg = convert_string(blob) process_msg = MIMEMultipart() self._process_msg(base_msg, process_msg) return process_msg def _process_msg(self, base_msg, append_msg): for part in base_msg.walk(): # multipart/* are just containers if part.get_content_maintype() == 'multipart': continue ctype = None ctype_orig = part.get_content_type() payload = part.get_payload(decode=True) if not ctype_orig: ctype_orig = UNDEF_TYPE if ctype_orig in TYPE_NEEDED: ctype = type_from_starts_with(payload) if ctype is None: ctype = ctype_orig if ctype in INCLUDE_TYPES: self._do_include(payload, append_msg) continue if ctype in ARCHIVE_TYPES: self._explode_archive(payload, append_msg) continue if 'Content-Type' in base_msg: base_msg.replace_header('Content-Type', ctype) else: base_msg['Content-Type'] = ctype self._attach_part(append_msg, part) def _get_include_once_filename(self, entry): entry_fn = util.hash_blob(entry, 'md5', 64) return os.path.join(self.paths.get_ipath_cur('data'), 'urlcache', entry_fn) def _do_include(self, content, append_msg): # is just a list of urls, one per line # also support '#include ' for line in content.splitlines(): includeonce = False if line in ("#include", "#include-once"): continue if line.startswith("#include-once"): line = line[len("#include-once"):].lstrip() includeonce = True elif line.startswith("#include"): line = line[len("#include"):].lstrip() if line.startswith("#"): continue include_url = line.strip() if not include_url: continue includeonce_filename = self._get_include_once_filename(include_url) if includeonce and os.path.isfile(includeonce_filename): content = util.load_file(includeonce_filename) else: (content, st) = url_helper.readurl(include_url) if includeonce and url_helper.ok_http_code(st): util.write_file(includeonce_filename, content, mode=0600) if not url_helper.ok_http_code(st): content = '' new_msg = convert_string(content) self._process_msg(new_msg, append_msg) def _explode_archive(self, archive, append_msg): entries = util.load_yaml(archive, default=[], allowed=[list, set]) for ent in entries: # ent can be one of: # dict { 'filename' : 'value', 'content' : # 'value', 'type' : 'value' } # filename and type not be present # or # scalar(payload) if isinstance(ent, (str, basestring)): ent = {'content': ent} if not isinstance(ent, (dict)): # TODO raise? continue content = ent.get('content', '') mtype = ent.get('type') if not mtype: mtype = type_from_starts_with(content, ARCHIVE_UNDEF_TYPE) maintype, subtype = mtype.split('/', 1) if maintype == "text": msg = MIMEText(content, _subtype=subtype) else: msg = MIMEBase(maintype, subtype) msg.set_payload(content) if 'filename' in ent: msg.add_header('Content-Disposition', 'attachment', filename=ent['filename']) for header in list(ent.keys()): if header in ('content', 'filename', 'type'): continue msg.add_header(header, ent['header']) self._attach_part(append_msg, msg) def _multi_part_count(self, outer_msg, new_count=None): """ Return the number of attachments to this MIMEMultipart by looking at its 'Number-Attachments' header. """ if ATTACHMENT_FIELD not in outer_msg: outer_msg[ATTACHMENT_FIELD] = '0' if new_count is not None: outer_msg.replace_header(ATTACHMENT_FIELD, str(new_count)) fetched_count = 0 try: fetched_count = int(outer_msg.get(ATTACHMENT_FIELD)) except (ValueError, TypeError): outer_msg.replace_header(ATTACHMENT_FIELD, str(fetched_count)) return fetched_count def _attach_part(self, outer_msg, part): """ Attach an part to an outer message. outermsg must be a MIMEMultipart. Modifies a header in the message to keep track of number of attachments. """ cur = self._multi_part_count(outer_msg) if not part.get_filename(): fn = PART_FN_TPL % (cur + 1) part.add_header('Content-Disposition', 'attachment', filename=fn) outer_msg.attach(part) self._multi_part_count(outer_msg, cur + 1) class PartHandler(object): __metaclass__ = abc.ABCMeta def __init__(self, frequency, version=2): self.handler_version = version self.frequency = frequency def __repr__(self): return "%s: [%s]" % (util.obj_name(self), self.list_types()) @abc.abstractmethod def list_types(self): raise NotImplementedError() def handle_part(self, data, ctype, filename, payload, frequency): return self._handle_part(data, ctype, filename, payload, frequency) @abc.abstractmethod def _handle_part(self, data, ctype, filename, payload, frequency): raise NotImplementedError() def fixup_handler(mod, def_freq=PER_INSTANCE): if not hasattr(mod, "handler_version"): setattr(mod, "handler_version", 1) if not hasattr(mod, 'list_types'): def empty_types(): return [] setattr(mod, 'list_types', empty_types) if not hasattr(mod, 'frequency'): setattr(mod, 'frequency', def_freq) else: freq = mod.frequency if freq and freq not in FREQUENCIES: LOG.warn("Module %s has an unknown frequency %s", mod, freq) if not hasattr(mod, 'handle_part'): def empty_handler(_data, _ctype, _filename, _payload): pass setattr(mod, 'handle_part', empty_handler) return mod def run_part(mod, data, ctype, filename, payload, frequency): mod_freq = mod.frequency if not (mod_freq == PER_ALWAYS or (frequency == PER_INSTANCE and mod_freq == PER_INSTANCE)): return mod_ver = mod.handler_version try: if mod_ver == 1: mod.handle_part(data, ctype, filename, payload) else: mod.handle_part(data, ctype, filename, payload, frequency) except: util.logexc(LOG, ("Failed calling mod %s (%s, %s, %s)" " with frequency %s"), mod, ctype, filename, mod_ver, frequency) def call_begin(mod, data, frequency): run_part(mod, data, CONTENT_START, None, None, frequency) def call_end(mod, data, frequency): run_part(mod, data, CONTENT_END, None, None, frequency) def walker_handle_handler(pdata, _ctype, _filename, payload): curcount = pdata['handlercount'] modname = PART_HANDLER_FN_TMPL % (curcount) frequency = pdata['frequency'] modfname = os.path.join(pdata['handlerdir'], "%s" % (modname)) if not modfname.endswith(".py"): modfname = "%s.py" % (modfname) # TODO: Check if path exists?? util.write_file(modfname, payload, 0600) handlers = pdata['handlers'] try: mod = fixup_handler(importer.import_module(modname)) handlers.register(mod) call_begin(mod, pdata['data'], frequency) pdata['handlercount'] = curcount + 1 except: util.logexc(LOG, "Failed at registered python file: %s", modfname) def extract_first_or_bytes(blob, size): # Extract the first line upto X bytes or X bytes from more than the # first line if the first line does not contain enough bytes first_line = blob.split("\n", 1)[0] if len(first_line) >= size: start = first_line[:size] else: start = blob[0:size] return start def walker_callback(pdata, ctype, filename, payload): if ctype in PART_CONTENT_TYPES: walker_handle_handler(pdata, ctype, filename, payload) return handlers = pdata['handlers'] if ctype not in handlers: # Extract the first line or 24 bytes for displaying in the log start = extract_first_or_bytes(payload, 24) details = "'%s...'" % (start.encode("string-escape")) if ctype == NOT_MULTIPART_TYPE: LOG.warning("Unhandled non-multipart (%s) userdata: %s", ctype, details) else: LOG.warning("Unhandled unknown content-type (%s) userdata: %s", ctype, details) else: run_part(handlers[ctype], pdata['data'], ctype, filename, payload, pdata['frequency']) # Callback is a function that will be called with # (data, content_type, filename, payload) def walk(msg, callback, data): partnum = 0 for part in msg.walk(): # multipart/* are just containers if part.get_content_maintype() == 'multipart': continue ctype = part.get_content_type() if ctype is None: ctype = OCTET_TYPE filename = part.get_filename() if not filename: filename = PART_FN_TPL % (partnum) callback(data, ctype, filename, part.get_payload(decode=True)) partnum = partnum + 1 # Coverts a raw string into a mime message def convert_string(raw_data, headers=None): if not raw_data: raw_data = '' if not headers: headers = {} data = util.decomp_str(raw_data) if "mime-version:" in data[0:4096].lower(): msg = email.message_from_string(data) for (key, val) in headers.iteritems(): if key in msg: msg.replace_header(key, val) else: msg[key] = val else: mtype = headers.get("Content-Type", NOT_MULTIPART_TYPE) maintype, subtype = mtype.split("/", 1) msg = MIMEBase(maintype, subtype, *headers) msg.set_payload(data) return msg def type_from_starts_with(payload, default=None): for text in INCLUSION_SRCH: if payload.startswith(text): return INCLUSION_TYPES_MAP[text] return default