From f895cb12141281702b34da18f2384deb64c881e7 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Wed, 21 Jan 2015 17:56:53 -0500 Subject: Largely merge lp:~harlowja/cloud-init/py2-3 albeit manually because it seemed to be behind trunk. `tox -e py27` passes full test suite. Now to work on replacing mocker. --- cloudinit/handlers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cloudinit/handlers/__init__.py') diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index 059d7495..d67a70ea 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -147,7 +147,7 @@ def walker_handle_handler(pdata, _ctype, _filename, payload): if not modfname.endswith(".py"): modfname = "%s.py" % (modfname) # TODO(harlowja): Check if path exists?? - util.write_file(modfname, payload, 0600) + util.write_file(modfname, payload, 0o600) handlers = pdata['handlers'] try: mod = fixup_handler(importer.import_module(modname)) -- cgit v1.2.3 From 0e7e5041a0ef80099c48341952e881009eb65fdf Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Mon, 26 Jan 2015 12:27:51 -0500 Subject: Fix a few string/bytes problems with Python 3. --- cloudinit/handlers/__init__.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'cloudinit/handlers/__init__.py') diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index d67a70ea..cdccf122 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -22,6 +22,7 @@ import abc import os +import six from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE, FREQUENCIES) @@ -174,11 +175,11 @@ def _extract_first_or_bytes(blob, size): def _escape_string(text): try: - return text.encode("string-escape") - except TypeError: + return text.encode("string_escape") + except (LookupError, TypeError): try: - # Unicode doesn't support string-escape... - return text.encode('unicode-escape') + # Unicode (and Python 3's str) doesn't support string_escape... + return text.encode('unicode_escape') except TypeError: # Give up... pass @@ -232,7 +233,17 @@ def walk(msg, callback, data): headers = dict(part) LOG.debug(headers) headers['Content-Type'] = ctype - callback(data, filename, part.get_payload(decode=True), headers) + payload = part.get_payload(decode=True) + # In Python 3, decoding the payload will ironically hand us a bytes + # object. 'decode' means to decode according to + # Content-Transfer-Encoding, not according to any charset in the + # Content-Type. So, if we end up with bytes, first try to decode to + # str via CT charset, and failing that, try utf-8 using surrogate + # escapes. + if six.PY3 and isinstance(payload, bytes): + charset = part.get_charset() or 'utf-8' + payload = payload.decode(charset, errors='surrogateescape') + callback(data, filename, payload, headers) partnum = partnum + 1 -- cgit v1.2.3 From 96d130e7732f1242d71c65a32412ae56cb229abf Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Tue, 27 Jan 2015 15:11:53 -0500 Subject: Respond to review: - Refactor "fully" decoding the payload of a text/* part. In Python 3, decode=True only means to decode according to Content-Transfer-Encoding, not according to any charset in the Content-Type header. So do that. --- cloudinit/handlers/__init__.py | 11 +---------- cloudinit/user_data.py | 12 +----------- cloudinit/util.py | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 21 deletions(-) (limited to 'cloudinit/handlers/__init__.py') diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index cdccf122..6b7abbcd 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -233,16 +233,7 @@ def walk(msg, callback, data): headers = dict(part) LOG.debug(headers) headers['Content-Type'] = ctype - payload = part.get_payload(decode=True) - # In Python 3, decoding the payload will ironically hand us a bytes - # object. 'decode' means to decode according to - # Content-Transfer-Encoding, not according to any charset in the - # Content-Type. So, if we end up with bytes, first try to decode to - # str via CT charset, and failing that, try utf-8 using surrogate - # escapes. - if six.PY3 and isinstance(payload, bytes): - charset = part.get_charset() or 'utf-8' - payload = payload.decode(charset, errors='surrogateescape') + payload = util.fully_decoded_payload(part) callback(data, filename, payload, headers) partnum = partnum + 1 diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index bf5642a5..5fdc46f2 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -108,17 +108,7 @@ class UserDataProcessor(object): ctype = None ctype_orig = part.get_content_type() - ctype_main = part.get_content_maintype() - payload = part.get_payload(decode=True) - # In Python 3, decoding the payload will ironically hand us a - # bytes object. 'decode' means to decode according to - # Content-Transfer-Encoding, not according to any charset in the - # Content-Type. So, if we end up with bytes, first try to decode - # to str via CT charset, and failing that, try utf-8 using - # surrogate escapes. - if six.PY3 and ctype_main == 'text' and isinstance(payload, bytes): - charset = part.get_charset() or 'utf-8' - payload = payload.decode(charset, errors='surrogateescape') + payload = util.fully_decoded_payload(part) was_compressed = False # When the message states it is of a gzipped content type ensure diff --git a/cloudinit/util.py b/cloudinit/util.py index 8916cc11..3a921afe 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -110,6 +110,21 @@ def b64e(source): return b64encode(source).decode('utf-8') +def fully_decoded_payload(part): + # In Python 3, decoding the payload will ironically hand us a bytes object. + # 'decode' means to decode according to Content-Transfer-Encoding, not + # according to any charset in the Content-Type. So, if we end up with + # bytes, first try to decode to str via CT charset, and failing that, try + # utf-8 using surrogate escapes. + cte_payload = part.get_payload(decode=True) + if ( six.PY3 and + part.get_content_maintype() == 'text' and + isinstance(cte_payload, bytes)): + charset = part.get_charset() or 'utf-8' + return cte_payload.decode(charset, errors='surrogateescape') + return cte_payload + + # Path for DMI Data DMI_SYS_PATH = "/sys/class/dmi/id" -- cgit v1.2.3 From 31a8aab92656279b141a9c29e484c4895bde15d3 Mon Sep 17 00:00:00 2001 From: Oleg Strikov Date: Wed, 11 Mar 2015 20:22:54 +0300 Subject: userdata-handlers: python3-related fixes on do-not-process-this-part path Cloud-init crashed when received multipart userdata object with 'application/octet-stream' part or some other 'application/*' part except archived ones (x-gzip and friends). These parts are not processed by cloud-init and result only in a message in the log. We used some non-python3-friendly techniques while generating this log message which was a reason for the crash. --- cloudinit/handlers/__init__.py | 24 ++++++++++++++++++------ tests/unittests/test_data.py | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) (limited to 'cloudinit/handlers/__init__.py') diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index 6b7abbcd..d62fcd19 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -163,12 +163,19 @@ def walker_handle_handler(pdata, _ctype, _filename, payload): def _extract_first_or_bytes(blob, size): - # Extract the first line upto X bytes or X bytes from more than the - # first line if the first line does not contain enough bytes - first_line = blob.split("\n", 1)[0] - if len(first_line) >= size: - start = first_line[:size] - else: + # Extract the first line or upto X symbols for text objects + # Extract first X bytes for binary objects + try: + if isinstance(blob, six.string_types): + start = blob.split("\n", 1)[0] + else: + # We want to avoid decoding the whole blob (it might be huge) + # By taking 4*size bytes we have a guarantee to decode size utf8 chars + start = blob[:4*size].decode(errors='ignore').split("\n", 1)[0] + if len(start) >= size: + start = start[:size] + except UnicodeDecodeError: + # Bytes array doesn't contain a text object -- return chunk of raw bytes start = blob[0:size] return start @@ -183,6 +190,11 @@ def _escape_string(text): except TypeError: # Give up... pass + except AttributeError: + # We're in Python3 and received blob as text + # No escaping is needed because bytes are printed + # as 'b\xAA\xBB' automatically in Python3 + pass return text diff --git a/tests/unittests/test_data.py b/tests/unittests/test_data.py index 8fc280e4..4f24e2dd 100644 --- a/tests/unittests/test_data.py +++ b/tests/unittests/test_data.py @@ -13,6 +13,7 @@ except ImportError: from six import BytesIO, StringIO +from email import encoders from email.mime.application import MIMEApplication from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart @@ -492,6 +493,23 @@ c: 4 mock.call(ci.paths.get_ipath("cloud_config"), "", 0o600), ]) + def test_mime_application_octet_stream(self): + """Mime message of type application/octet-stream is ignored but shows warning.""" + ci = stages.Init() + message = MIMEBase("application", "octet-stream") + message.set_payload(b'\xbf\xe6\xb2\xc3\xd3\xba\x13\xa4\xd8\xa1\xcc\xbf') + encoders.encode_base64(message) + ci.datasource = FakeDataSource(message.as_string().encode()) + + with mock.patch('cloudinit.util.write_file') as mockobj: + log_file = self.capture_log(logging.WARNING) + ci.fetch() + ci.consume_data() + self.assertIn( + "Unhandled unknown content-type (application/octet-stream)", + log_file.getvalue()) + mockobj.assert_called_once_with( + ci.paths.get_ipath("cloud_config"), "", 0o600) class TestUDProcess(helpers.ResourceUsingTestCase): -- cgit v1.2.3 From dcd4b2b371059bd6249b4e43af371ee1162273e8 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 16 Apr 2015 16:41:06 -0400 Subject: pep8 fixes --- cloudinit/config/cc_snappy.py | 4 ++-- cloudinit/handlers/__init__.py | 6 +++--- tests/unittests/test_data.py | 5 +++-- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'cloudinit/handlers/__init__.py') diff --git a/cloudinit/config/cc_snappy.py b/cloudinit/config/cc_snappy.py index 6a7ae09b..bfe76558 100644 --- a/cloudinit/config/cc_snappy.py +++ b/cloudinit/config/cc_snappy.py @@ -72,7 +72,7 @@ def parse_filename(fname): name = fname_noext.partition("_")[0] shortname = name.partition(".")[0] return(name, shortname, fname_noext) - + def get_fs_package_ops(fspath): if not fspath: @@ -98,7 +98,7 @@ def makeop(op, name, config=None, path=None, cfgfile=None): def get_package_config(configs, name): # load the package's config from the configs dict. - # prefer full-name entry (config-example.canonical) + # prefer full-name entry (config-example.canonical) # over short name entry (config-example) if name in configs: return configs[name] diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index d62fcd19..52defe66 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -170,12 +170,12 @@ def _extract_first_or_bytes(blob, size): start = blob.split("\n", 1)[0] else: # We want to avoid decoding the whole blob (it might be huge) - # By taking 4*size bytes we have a guarantee to decode size utf8 chars - start = blob[:4*size].decode(errors='ignore').split("\n", 1)[0] + # By taking 4*size bytes we guarantee to decode size utf8 chars + start = blob[:4 * size].decode(errors='ignore').split("\n", 1)[0] if len(start) >= size: start = start[:size] except UnicodeDecodeError: - # Bytes array doesn't contain a text object -- return chunk of raw bytes + # Bytes array doesn't contain text so return chunk of raw bytes start = blob[0:size] return start diff --git a/tests/unittests/test_data.py b/tests/unittests/test_data.py index 4f24e2dd..b950c9a5 100644 --- a/tests/unittests/test_data.py +++ b/tests/unittests/test_data.py @@ -494,10 +494,10 @@ c: 4 ]) def test_mime_application_octet_stream(self): - """Mime message of type application/octet-stream is ignored but shows warning.""" + """Mime type application/octet-stream is ignored but shows warning.""" ci = stages.Init() message = MIMEBase("application", "octet-stream") - message.set_payload(b'\xbf\xe6\xb2\xc3\xd3\xba\x13\xa4\xd8\xa1\xcc\xbf') + message.set_payload(b'\xbf\xe6\xb2\xc3\xd3\xba\x13\xa4\xd8\xa1\xcc') encoders.encode_base64(message) ci.datasource = FakeDataSource(message.as_string().encode()) @@ -511,6 +511,7 @@ c: 4 mockobj.assert_called_once_with( ci.paths.get_ipath("cloud_config"), "", 0o600) + class TestUDProcess(helpers.ResourceUsingTestCase): def test_bytes_in_userdata(self): -- cgit v1.2.3 From 341a805fca9a06ce12e9f4bbbe15b3dded9eb6a4 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 16 Apr 2015 17:00:19 -0400 Subject: fix cloud-config-archive handling handling of cloud-config-archive input would fail in fully_decoded_payload. part.get_charset() would return a Charset object, but get_charset.input_codec is a string suitable for passing to decode. This handles that correctly, and is more careful about binary data inside input. The test added verifies that cloud-config inside a cloud-config-archive is handled correctly and also that binary data there is ignored without exceptions raised. LP: #1445143 --- cloudinit/handlers/__init__.py | 5 ++++- cloudinit/user_data.py | 9 +++++++-- cloudinit/util.py | 8 ++++++-- tests/unittests/test_data.py | 27 +++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 5 deletions(-) (limited to 'cloudinit/handlers/__init__.py') diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index 52defe66..53d5604a 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -263,7 +263,10 @@ def fixup_handler(mod, def_freq=PER_INSTANCE): def type_from_starts_with(payload, default=None): - payload_lc = payload.lower() + try: + payload_lc = util.decode_binary(payload).lower() + except UnicodeDecodeError: + return default payload_lc = payload_lc.lstrip() for text in INCLUSION_SRCH: if payload_lc.startswith(text): diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index eb3c7336..f7c5787c 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -49,6 +49,7 @@ INCLUDE_TYPES = ['text/x-include-url', 'text/x-include-once-url'] ARCHIVE_TYPES = ["text/cloud-config-archive"] UNDEF_TYPE = "text/plain" ARCHIVE_UNDEF_TYPE = "text/cloud-config" +ARCHIVE_UNDEF_BINARY_TYPE = "application/octet-stream" # This seems to hit most of the gzip possible content types. DECOMP_TYPES = [ @@ -265,11 +266,15 @@ class UserDataProcessor(object): content = ent.get('content', '') mtype = ent.get('type') if not mtype: - mtype = handlers.type_from_starts_with(content, - ARCHIVE_UNDEF_TYPE) + default = ARCHIVE_UNDEF_TYPE + if isinstance(content, six.binary_type): + default = ARCHIVE_UNDEF_BINARY_TYPE + mtype = handlers.type_from_starts_with(content, default) maintype, subtype = mtype.split('/', 1) if maintype == "text": + if isinstance(content, six.binary_type): + content = content.decode() msg = MIMEText(content, _subtype=subtype) else: msg = MIMEBase(maintype, subtype) diff --git a/cloudinit/util.py b/cloudinit/util.py index 971c1c2d..cae57770 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -121,8 +121,12 @@ def fully_decoded_payload(part): if (six.PY3 and part.get_content_maintype() == 'text' and isinstance(cte_payload, bytes)): - charset = part.get_charset() or 'utf-8' - return cte_payload.decode(charset, errors='surrogateescape') + charset = part.get_charset() + if charset and charset.input_codec: + encoding = charset.input_codec + else: + encoding = 'utf-8' + return cte_payload.decode(encoding, errors='surrogateescape') return cte_payload diff --git a/tests/unittests/test_data.py b/tests/unittests/test_data.py index b950c9a5..1b15dafa 100644 --- a/tests/unittests/test_data.py +++ b/tests/unittests/test_data.py @@ -512,6 +512,33 @@ c: 4 ci.paths.get_ipath("cloud_config"), "", 0o600) + def test_cloud_config_archive(self): + non_decodable = b'\x11\xc9\xb4gTH\xee\x12' + data = [{'content': '#cloud-config\npassword: gocubs\n'}, + {'content': '#cloud-config\nlocale: chicago\n'}, + {'content': non_decodable}] + message = b'#cloud-config-archive\n' + util.yaml_dumps(data).encode() + + ci = stages.Init() + ci.datasource = FakeDataSource(message) + + fs = {} + + def fsstore(filename, content, mode=0o0644, omode="wb"): + fs[filename] = content + + # consuming the user-data provided should write 'cloud_config' file + # which will have our yaml in it. + with mock.patch('cloudinit.util.write_file') as mockobj: + mockobj.side_effect = fsstore + ci.fetch() + ci.consume_data() + + cfg = util.load_yaml(fs[ci.paths.get_ipath("cloud_config")]) + self.assertEqual(cfg.get('password'), 'gocubs') + self.assertEqual(cfg.get('locale'), 'chicago') + + class TestUDProcess(helpers.ResourceUsingTestCase): def test_bytes_in_userdata(self): -- cgit v1.2.3