From 6d40d5b2e3da9577d4a2686444d47125e62817fe Mon Sep 17 00:00:00 2001 From: harlowja Date: Tue, 19 Feb 2013 22:51:49 -0800 Subject: Continue working on integrating requests. --- cloudinit/user_data.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'cloudinit/user_data.py') diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index 803ffc3a..4a640f1e 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -29,7 +29,6 @@ from email.mime.text import MIMEText from cloudinit import handlers from cloudinit import log as logging -from cloudinit import url_helper from cloudinit import util LOG = logging.getLogger(__name__) @@ -173,10 +172,10 @@ class UserDataProcessor(object): if include_once_on and os.path.isfile(include_once_fn): content = util.load_file(include_once_fn) else: - resp = url_helper.readurl(include_url) - if include_once_on and resp.ok(): + resp = util.read_file_or_url(include_url) + if include_once_on and resp.ok: util.write_file(include_once_fn, str(resp), mode=0600) - if resp.ok(): + if resp.ok: content = str(resp) else: LOG.warn(("Fetching from %s resulted in" -- cgit v1.2.3 From eacfc7ffbec3e6a0348ed484da895e2d2fc5ba10 Mon Sep 17 00:00:00 2001 From: harlowja Date: Sat, 23 Feb 2013 21:23:24 -0800 Subject: Get tests working and further adjustments. --- cloudinit/sources/DataSourceMAAS.py | 9 +++++--- cloudinit/url_helper.py | 21 +++++++++-------- cloudinit/user_data.py | 8 ++++--- cloudinit/util.py | 34 ++++++++++++++++++++-------- tests/unittests/test__init__.py | 10 ++++---- tests/unittests/test_datasource/test_maas.py | 11 +++++---- 6 files changed, 60 insertions(+), 33 deletions(-) (limited to 'cloudinit/user_data.py') diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py index 6e1133b2..0c526305 100644 --- a/cloudinit/sources/DataSourceMAAS.py +++ b/cloudinit/sources/DataSourceMAAS.py @@ -223,9 +223,12 @@ def read_maas_seed_url(seed_url, header_cb=None, timeout=None, else: headers = {} try: - resp = util.read_file_or_url(url, headers=headers, timeout=timeout, - ssl_details=util.fetch_ssl_details(paths)) - if resp.ok: + ssl_details = util.fetch_ssl_details(paths) + resp = util.read_file_or_url(url, + headers=headers, + timeout=timeout, + ssl_details=ssl_details) + if resp.ok(): md[name] = str(resp) else: LOG.warn(("Fetching from %s resulted in" diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 0839e63b..300e70c2 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -70,9 +70,14 @@ class UrlResponse(object): def url(self): return self._response.url - @property - def ok(self): - return self._response.ok + def ok(self, redirects_ok=False): + upper = 300 + if redirects_ok: + upper = 400 + if self.code >= 200 and self.code < upper: + return True + else: + return False @property def headers(self): @@ -158,11 +163,8 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1, r = requests.request(**req_args) if check_status: r.raise_for_status() - contents = r.content - status = r.status_code - headers = r.headers LOG.debug("Read from %s (%s, %sb) after %s attempts", url, - status, len(contents), (i + 1)) + r.status_code, len(r.content), (i + 1)) # Doesn't seem like we can make it use a different # subclass for responses, so add our own backward-compat # attrs @@ -256,8 +258,9 @@ def wait_for_url(urls, max_wait=None, timeout=None, time_taken = int(time.time() - start_time) status_msg = "Calling '%s' failed [%s/%ss]: %s" % (url, - time_taken, - max_wait, reason) + time_taken, + max_wait, + reason) status_cb(status_msg) if exception_cb: exception_cb(msg=status_msg, exception=e) diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index eaf448a7..df069ff8 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -59,6 +59,7 @@ EXAMINE_FOR_LAUNCH_INDEX = ["text/cloud-config"] class UserDataProcessor(object): def __init__(self, paths): self.paths = paths + self.ssl_details = util.fetch_ssl_details(paths) def process(self, blob): accumulating_msg = MIMEMultipart() @@ -172,10 +173,11 @@ class UserDataProcessor(object): if include_once_on and os.path.isfile(include_once_fn): content = util.load_file(include_once_fn) else: - resp = util.read_file_or_url(include_url) - if include_once_on and resp.ok: + resp = util.read_file_or_url(include_url, + ssl_details=self.ssl_details) + if include_once_on and resp.ok(): util.write_file(include_once_fn, str(resp), mode=0600) - if resp.ok: + if resp.ok(): content = str(resp) else: LOG.warn(("Fetching from %s resulted in" diff --git a/cloudinit/util.py b/cloudinit/util.py index 42b3ab01..dc3c5639 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -70,18 +70,31 @@ FN_ALLOWED = ('_-.()' + string.digits + string.ascii_letters) CONTAINER_TESTS = ['running-in-container', 'lxc-is-container'] -class FileResponse(object): - def __init__(self, path, contents): - self.code = 200 +# Made to have same accessors as UrlResponse so that the +# read_file_or_url can return this or that object and the +# 'user' of those objects will not need to know the difference. +class StringResponse(object): + def __init__(self, contents, code=200): + self.code = code self.headers = {} self.contents = contents - self.ok = True - self.url = path + self.url = None + + def ok(self, *args, **kwargs): + if self.code != 200: + return False + return True def __str__(self): return self.contents +class FileResponse(StringResponse): + def __init__(self, path, contents, code=200): + StringResponse.__init__(self, contents, code=code) + self.url = path + + class ProcessExecutionError(IOError): MESSAGE_TMPL = ('%(description)s\n' @@ -630,7 +643,7 @@ def read_optional_seed(fill, base="", ext="", timeout=5): fill['user-data'] = ud fill['meta-data'] = md return True - except OSError as e: + except IOError as e: if e.errno == errno.ENOENT: return False raise @@ -670,9 +683,12 @@ def fetch_ssl_details(paths=None): def read_file_or_url(url, timeout=5, retries=10, headers=None, data=None, sec_between=1, ssl_details=None): + url = url.lstrip() if url.startswith("/"): url = "file://%s" % url if url.lower().startswith("file://"): + if data: + LOG.warn("Unable to post data to file resource %s", url) file_path = url[len("file://"):] return FileResponse(file_path, contents=load_file(file_path)) else: @@ -724,13 +740,13 @@ def read_seeded(base="", ext="", timeout=5, retries=10, file_retries=0): md_resp = read_file_or_url(md_url, timeout, retries, file_retries) md = None - if md_resp.ok: + if md_resp.ok(): md_str = str(md_resp) md = load_yaml(md_str, default={}) ud_resp = read_file_or_url(ud_url, timeout, retries, file_retries) ud = None - if ud_resp.ok: + if ud_resp.ok(): ud_str = str(ud_resp) ud = ud_str @@ -900,7 +916,7 @@ def get_cmdline_url(names=('cloud-config-url', 'url'), return (None, None, None) resp = read_file_or_url(url) - if resp.contents.startswith(starts) and resp.ok: + if resp.contents.startswith(starts) and resp.ok(): return (key, url, str(resp)) return (key, url, None) diff --git a/tests/unittests/test__init__.py b/tests/unittests/test__init__.py index ac082076..d707afa9 100644 --- a/tests/unittests/test__init__.py +++ b/tests/unittests/test__init__.py @@ -191,8 +191,8 @@ class TestCmdlineUrl(MockerTestCase): mock_readurl = self.mocker.replace(url_helper.readurl, passthrough=False) - mock_readurl(url) - self.mocker.result(url_helper.UrlResponse(200, payload)) + mock_readurl(url, ARGS, KWARGS) + self.mocker.result(util.StringResponse(payload)) self.mocker.replay() self.assertEqual((key, url, None), @@ -207,8 +207,8 @@ class TestCmdlineUrl(MockerTestCase): mock_readurl = self.mocker.replace(url_helper.readurl, passthrough=False) - mock_readurl(url) - self.mocker.result(url_helper.UrlResponse(200, payload)) + mock_readurl(url, ARGS, KWARGS) + self.mocker.result(util.StringResponse(payload)) self.mocker.replay() self.assertEqual((key, url, payload), @@ -221,7 +221,7 @@ class TestCmdlineUrl(MockerTestCase): cmdline = "ro %s=%s bar=1" % (key, url) self.mocker.replace(url_helper.readurl, passthrough=False) - self.mocker.result(url_helper.UrlResponse(400)) + self.mocker.result(util.StringResponse("")) self.mocker.replay() self.assertEqual((None, None, None), diff --git a/tests/unittests/test_datasource/test_maas.py b/tests/unittests/test_datasource/test_maas.py index b56fea82..47f8caa4 100644 --- a/tests/unittests/test_datasource/test_maas.py +++ b/tests/unittests/test_datasource/test_maas.py @@ -3,12 +3,13 @@ import os from cloudinit.sources import DataSourceMAAS from cloudinit import url_helper +from cloudinit import util from tests.unittests.helpers import populate_dir -from mocker import MockerTestCase +import mocker -class TestMAASDataSource(MockerTestCase): +class TestMAASDataSource(mocker.MockerTestCase): def setUp(self): super(TestMAASDataSource, self).setUp() @@ -115,9 +116,11 @@ class TestMAASDataSource(MockerTestCase): for key in valid_order: url = "%s/%s/%s" % (my_seed, my_ver, key) - mock_request(url, headers=my_headers, timeout=None) + mock_request(url, headers=my_headers, timeout=mocker.ANY, + data=mocker.ANY, sec_between=mocker.ANY, + ssl_details=mocker.ANY, retries=mocker.ANY) resp = valid.get(key) - self.mocker.result(url_helper.UrlResponse(200, resp)) + self.mocker.result(util.StringResponse(resp)) self.mocker.replay() (userdata, metadata) = DataSourceMAAS.read_maas_seed_url(my_seed, -- cgit v1.2.3 From c818ddba06ff7d486a085edae531896156c14e9d Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Fri, 19 Jul 2013 15:49:35 -0700 Subject: Add the ability to decompress MIME gzip. Instead of being restricted to only gzip compressing the overall mime segment or individual included segments, allow for each mime segment to be gzip compressed. LP: #1203203 --- cloudinit/user_data.py | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) (limited to 'cloudinit/user_data.py') diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index df069ff8..23c31fde 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -48,6 +48,18 @@ ARCHIVE_TYPES = ["text/cloud-config-archive"] UNDEF_TYPE = "text/plain" ARCHIVE_UNDEF_TYPE = "text/cloud-config" +# This seems to hit most of the gzip possible content types. +DECOMP_TYPES = [ + 'application/gzip', + 'application/gzip-compressed', + 'application/gzipped', + 'application/x-compress', + 'application/x-compressed', + 'application/x-gunzip', + 'application/x-gzip', + 'application/x-gzip-compressed', +] + # Msg header used to track attachments ATTACHMENT_FIELD = 'Number-Attachments' @@ -67,6 +79,13 @@ class UserDataProcessor(object): return accumulating_msg def _process_msg(self, base_msg, append_msg): + + def replace_header(part, key, value): + if key in part: + part.replace_header(key, value) + else: + part[key] = value + for part in base_msg.walk(): if is_skippable(part): continue @@ -75,6 +94,18 @@ class UserDataProcessor(object): ctype_orig = part.get_content_type() payload = part.get_payload(decode=True) + # When the message states it is of a gzipped content type ensure + # that we attempt to decode said payload so that the decompressed + # data can be examined (instead of the compressed data). + if ctype_orig in DECOMP_TYPES: + try: + payload = util.decomp_gzip(payload, quiet=False) + ctype_orig = UNDEF_TYPE + # TODO(harlowja): should we also set the payload to the + # decompressed value?? + except util.DecompressionError: + pass + if not ctype_orig: ctype_orig = UNDEF_TYPE @@ -85,10 +116,7 @@ class UserDataProcessor(object): ctype = ctype_orig if ctype != ctype_orig: - if CONTENT_TYPE in part: - part.replace_header(CONTENT_TYPE, ctype) - else: - part[CONTENT_TYPE] = ctype + replace_header(part, CONTENT_TYPE, ctype) if ctype in INCLUDE_TYPES: self._do_include(payload, append_msg) @@ -100,10 +128,7 @@ class UserDataProcessor(object): # Should this be happening, shouldn't # the part header be modified and not the base? - if CONTENT_TYPE in base_msg: - base_msg.replace_header(CONTENT_TYPE, ctype) - else: - base_msg[CONTENT_TYPE] = ctype + replace_header(base_msg, CONTENT_TYPE, ctype) self._attach_part(append_msg, part) -- cgit v1.2.3 From 7880588f804ea035f03eba9335af71f3322dab97 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Sat, 20 Jul 2013 14:34:00 -0700 Subject: Ensure we reset the part after decompression. --- cloudinit/user_data.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'cloudinit/user_data.py') diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index 23c31fde..97853e51 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -23,8 +23,10 @@ import os import email + from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart +from email.mime.nonmultipart import MIMENonMultipart from email.mime.text import MIMEText from cloudinit import handlers @@ -80,6 +82,10 @@ class UserDataProcessor(object): def _process_msg(self, base_msg, append_msg): + def find_ctype(payload): + ctype = handlers.type_from_starts_with(payload) + return ctype + def replace_header(part, key, value): if key in part: part.replace_header(key, value) @@ -93,6 +99,7 @@ class UserDataProcessor(object): ctype = None ctype_orig = part.get_content_type() payload = part.get_payload(decode=True) + was_compressed = False # When the message states it is of a gzipped content type ensure # that we attempt to decode said payload so that the decompressed @@ -100,21 +107,32 @@ class UserDataProcessor(object): if ctype_orig in DECOMP_TYPES: try: payload = util.decomp_gzip(payload, quiet=False) - ctype_orig = UNDEF_TYPE - # TODO(harlowja): should we also set the payload to the - # decompressed value?? - except util.DecompressionError: - pass + # At this point we don't know what the content-type is + # since we just decompressed it. + ctype_orig = None + was_compressed = True + except util.DecompressionError as e: + LOG.warn("Failed decompressing payload from %s of length" + " %s due to: %s", ctype_orig, len(payload), e) + continue + # Attempt to figure out the payloads content-type if not ctype_orig: ctype_orig = UNDEF_TYPE - if ctype_orig in TYPE_NEEDED: - ctype = handlers.type_from_starts_with(payload) - + ctype = find_ctype(payload) if ctype is None: ctype = ctype_orig + # In the case where the data was compressed, we want to make sure + # that we create a new message that contains the found content + # type with the uncompressed content since later traversals of the + # messages will expect a part not compressed. + if was_compressed: + maintype, subtype = ctype.split("/", 1) + part = MIMENonMultipart(maintype, subtype) + part.set_payload(payload) + if ctype != ctype_orig: replace_header(part, CONTENT_TYPE, ctype) @@ -126,7 +144,7 @@ class UserDataProcessor(object): self._explode_archive(payload, append_msg) continue - # Should this be happening, shouldn't + # TODO(harlowja): Should this be happening, shouldn't # the part header be modified and not the base? replace_header(base_msg, CONTENT_TYPE, ctype) -- cgit v1.2.3 From 64c69053c11385cc43b6c628dbe8a1bf28ccc49c Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Sat, 20 Jul 2013 14:57:42 -0700 Subject: Keep filename from original part. --- cloudinit/user_data.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'cloudinit/user_data.py') diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index 97853e51..e17bcaee 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -130,8 +130,12 @@ class UserDataProcessor(object): # messages will expect a part not compressed. if was_compressed: maintype, subtype = ctype.split("/", 1) - part = MIMENonMultipart(maintype, subtype) - part.set_payload(payload) + n_part = MIMENonMultipart(maintype, subtype) + n_part.set_payload(payload) + if part.get_filename(): + n_part.add_header('Content-Disposition', 'attachment', + filename=part.get_filename()) + part = n_part if ctype != ctype_orig: replace_header(part, CONTENT_TYPE, ctype) -- cgit v1.2.3 From 432778cf2890c19940f29f47f9efc2cb8e784f43 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Sat, 20 Jul 2013 16:34:39 -0700 Subject: Unify filename, header replacement. --- cloudinit/user_data.py | 56 ++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 24 deletions(-) (limited to 'cloudinit/user_data.py') diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index e17bcaee..454f3c06 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -70,6 +70,19 @@ ATTACHMENT_FIELD = 'Number-Attachments' EXAMINE_FOR_LAUNCH_INDEX = ["text/cloud-config"] +def _replace_header(msg, key, value): + del msg[key] + msg[key] = value + + +def _set_filename(msg, filename): + if not filename: + return + del msg['Content-Disposition'] + msg.add_header('Content-Disposition', + 'attachment', filename=str(filename)) + + class UserDataProcessor(object): def __init__(self, paths): self.paths = paths @@ -83,14 +96,7 @@ class UserDataProcessor(object): def _process_msg(self, base_msg, append_msg): def find_ctype(payload): - ctype = handlers.type_from_starts_with(payload) - return ctype - - def replace_header(part, key, value): - if key in part: - part.replace_header(key, value) - else: - part[key] = value + return handlers.type_from_starts_with(payload) for part in base_msg.walk(): if is_skippable(part): @@ -132,13 +138,17 @@ class UserDataProcessor(object): maintype, subtype = ctype.split("/", 1) n_part = MIMENonMultipart(maintype, subtype) n_part.set_payload(payload) - if part.get_filename(): - n_part.add_header('Content-Disposition', 'attachment', - filename=part.get_filename()) + # Copy various headers from the old part to the new one, + # but don't include all the headers since some are not useful + # after decoding and decompression. + _set_filename(n_part, part.get_filename()) + for h in ('Launch-Index',): + if h in part: + _replace_header(n_part, h, str(part[h])) part = n_part if ctype != ctype_orig: - replace_header(part, CONTENT_TYPE, ctype) + _replace_header(part, CONTENT_TYPE, ctype) if ctype in INCLUDE_TYPES: self._do_include(payload, append_msg) @@ -150,7 +160,7 @@ class UserDataProcessor(object): # TODO(harlowja): Should this be happening, shouldn't # the part header be modified and not the base? - replace_header(base_msg, CONTENT_TYPE, ctype) + _replace_header(base_msg, CONTENT_TYPE, ctype) self._attach_part(append_msg, part) @@ -185,8 +195,7 @@ class UserDataProcessor(object): def _process_before_attach(self, msg, attached_id): if not msg.get_filename(): - msg.add_header('Content-Disposition', - 'attachment', filename=PART_FN_TPL % (attached_id)) + _set_filename(msg, PART_FN_TPL % (attached_id)) self._attach_launch_index(msg) def _do_include(self, content, append_msg): @@ -264,13 +273,15 @@ class UserDataProcessor(object): msg.set_payload(content) if 'filename' in ent: - msg.add_header('Content-Disposition', - 'attachment', filename=ent['filename']) + _set_filename(msg, ent['filename']) if 'launch-index' in ent: msg.add_header('Launch-Index', str(ent['launch-index'])) for header in list(ent.keys()): - if header in ('content', 'filename', 'type', 'launch-index'): + if header.lower() in ('content', 'filename', 'type', + 'launch-index', 'content-disposition', + ATTACHMENT_FIELD.lower(), + CONTENT_TYPE.lower()): continue msg.add_header(header, ent[header]) @@ -285,13 +296,13 @@ class UserDataProcessor(object): outer_msg[ATTACHMENT_FIELD] = '0' if new_count is not None: - outer_msg.replace_header(ATTACHMENT_FIELD, str(new_count)) + _replace_header(outer_msg, ATTACHMENT_FIELD, str(new_count)) fetched_count = 0 try: fetched_count = int(outer_msg.get(ATTACHMENT_FIELD)) except (ValueError, TypeError): - outer_msg.replace_header(ATTACHMENT_FIELD, str(fetched_count)) + _replace_header(outer_msg, ATTACHMENT_FIELD, str(fetched_count)) return fetched_count def _attach_part(self, outer_msg, part): @@ -323,10 +334,7 @@ def convert_string(raw_data, headers=None): if "mime-version:" in data[0:4096].lower(): msg = email.message_from_string(data) for (key, val) in headers.iteritems(): - if key in msg: - msg.replace_header(key, val) - else: - msg[key] = val + _replace_header(msg, key, val) else: mtype = headers.get(CONTENT_TYPE, NOT_MULTIPART_TYPE) maintype, subtype = mtype.split("/", 1) -- cgit v1.2.3 From 251317563bd36a339e6fa7a08a0fc05b5ee975a4 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Sat, 20 Jul 2013 16:40:11 -0700 Subject: Just check the filename existing. --- cloudinit/user_data.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'cloudinit/user_data.py') diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index 454f3c06..d49ea094 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -76,8 +76,6 @@ def _replace_header(msg, key, value): def _set_filename(msg, filename): - if not filename: - return del msg['Content-Disposition'] msg.add_header('Content-Disposition', 'attachment', filename=str(filename)) @@ -141,7 +139,8 @@ class UserDataProcessor(object): # Copy various headers from the old part to the new one, # but don't include all the headers since some are not useful # after decoding and decompression. - _set_filename(n_part, part.get_filename()) + if part.get_filename(): + _set_filename(n_part, part.get_filename()) for h in ('Launch-Index',): if h in part: _replace_header(n_part, h, str(part[h])) -- cgit v1.2.3