From f895cb12141281702b34da18f2384deb64c881e7 Mon Sep 17 00:00:00 2001
From: Barry Warsaw <barry@python.org>
Date: Wed, 21 Jan 2015 17:56:53 -0500
Subject: Largely merge lp:~harlowja/cloud-init/py2-3 albeit manually because
 it seemed to be behind trunk.

`tox -e py27` passes full test suite.  Now to work on replacing mocker.
---
 cloudinit/handlers/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'cloudinit/handlers/__init__.py')

diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py
index 059d7495..d67a70ea 100644
--- a/cloudinit/handlers/__init__.py
+++ b/cloudinit/handlers/__init__.py
@@ -147,7 +147,7 @@ def walker_handle_handler(pdata, _ctype, _filename, payload):
     if not modfname.endswith(".py"):
         modfname = "%s.py" % (modfname)
     # TODO(harlowja): Check if path exists??
-    util.write_file(modfname, payload, 0600)
+    util.write_file(modfname, payload, 0o600)
     handlers = pdata['handlers']
     try:
         mod = fixup_handler(importer.import_module(modname))
-- 
cgit v1.2.3


From 0e7e5041a0ef80099c48341952e881009eb65fdf Mon Sep 17 00:00:00 2001
From: Barry Warsaw <barry@python.org>
Date: Mon, 26 Jan 2015 12:27:51 -0500
Subject: Fix a few string/bytes problems with Python 3.

---
 cloudinit/handlers/__init__.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'cloudinit/handlers/__init__.py')

diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py
index d67a70ea..cdccf122 100644
--- a/cloudinit/handlers/__init__.py
+++ b/cloudinit/handlers/__init__.py
@@ -22,6 +22,7 @@
 
 import abc
 import os
+import six
 
 from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE, FREQUENCIES)
 
@@ -174,11 +175,11 @@ def _extract_first_or_bytes(blob, size):
 
 def _escape_string(text):
     try:
-        return text.encode("string-escape")
-    except TypeError:
+        return text.encode("string_escape")
+    except (LookupError, TypeError):
         try:
-            # Unicode doesn't support string-escape...
-            return text.encode('unicode-escape')
+            # Unicode (and Python 3's str) doesn't support string_escape...
+            return text.encode('unicode_escape')
         except TypeError:
             # Give up...
             pass
@@ -232,7 +233,17 @@ def walk(msg, callback, data):
         headers = dict(part)
         LOG.debug(headers)
         headers['Content-Type'] = ctype
-        callback(data, filename, part.get_payload(decode=True), headers)
+        payload = part.get_payload(decode=True)
+        # In Python 3, decoding the payload will ironically hand us a bytes
+        # object.  'decode' means to decode according to
+        # Content-Transfer-Encoding, not according to any charset in the
+        # Content-Type.  So, if we end up with bytes, first try to decode to
+        # str via CT charset, and failing that, try utf-8 using surrogate
+        # escapes.
+        if six.PY3 and isinstance(payload, bytes):
+            charset = part.get_charset() or 'utf-8'
+            payload = payload.decode(charset, errors='surrogateescape')
+        callback(data, filename, payload, headers)
         partnum = partnum + 1
 
 
-- 
cgit v1.2.3


From 96d130e7732f1242d71c65a32412ae56cb229abf Mon Sep 17 00:00:00 2001
From: Barry Warsaw <barry@python.org>
Date: Tue, 27 Jan 2015 15:11:53 -0500
Subject: Respond to review:

- Refactor "fully" decoding the payload of a text/* part.  In Python 3,
  decode=True only means to decode according to Content-Transfer-Encoding, not
  according to any charset in the Content-Type header.  So do that.
---
 cloudinit/handlers/__init__.py | 11 +----------
 cloudinit/user_data.py         | 12 +-----------
 cloudinit/util.py              | 15 +++++++++++++++
 3 files changed, 17 insertions(+), 21 deletions(-)

(limited to 'cloudinit/handlers/__init__.py')

diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py
index cdccf122..6b7abbcd 100644
--- a/cloudinit/handlers/__init__.py
+++ b/cloudinit/handlers/__init__.py
@@ -233,16 +233,7 @@ def walk(msg, callback, data):
         headers = dict(part)
         LOG.debug(headers)
         headers['Content-Type'] = ctype
-        payload = part.get_payload(decode=True)
-        # In Python 3, decoding the payload will ironically hand us a bytes
-        # object.  'decode' means to decode according to
-        # Content-Transfer-Encoding, not according to any charset in the
-        # Content-Type.  So, if we end up with bytes, first try to decode to
-        # str via CT charset, and failing that, try utf-8 using surrogate
-        # escapes.
-        if six.PY3 and isinstance(payload, bytes):
-            charset = part.get_charset() or 'utf-8'
-            payload = payload.decode(charset, errors='surrogateescape')
+        payload = util.fully_decoded_payload(part)
         callback(data, filename, payload, headers)
         partnum = partnum + 1
 
diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py
index bf5642a5..5fdc46f2 100644
--- a/cloudinit/user_data.py
+++ b/cloudinit/user_data.py
@@ -108,17 +108,7 @@ class UserDataProcessor(object):
 
             ctype = None
             ctype_orig = part.get_content_type()
-            ctype_main = part.get_content_maintype()
-            payload = part.get_payload(decode=True)
-            # In Python 3, decoding the payload will ironically hand us a
-            # bytes object.  'decode' means to decode according to
-            # Content-Transfer-Encoding, not according to any charset in the
-            # Content-Type.  So, if we end up with bytes, first try to decode
-            # to str via CT charset, and failing that, try utf-8 using
-            # surrogate escapes.
-            if six.PY3 and ctype_main == 'text' and isinstance(payload, bytes):
-                charset = part.get_charset() or 'utf-8'
-                payload = payload.decode(charset, errors='surrogateescape')
+            payload = util.fully_decoded_payload(part)
             was_compressed = False
 
             # When the message states it is of a gzipped content type ensure
diff --git a/cloudinit/util.py b/cloudinit/util.py
index 8916cc11..3a921afe 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -110,6 +110,21 @@ def b64e(source):
     return b64encode(source).decode('utf-8')
 
 
+def fully_decoded_payload(part):
+    # In Python 3, decoding the payload will ironically hand us a bytes object.
+    # 'decode' means to decode according to Content-Transfer-Encoding, not
+    # according to any charset in the Content-Type.  So, if we end up with
+    # bytes, first try to decode to str via CT charset, and failing that, try
+    # utf-8 using surrogate escapes.
+    cte_payload = part.get_payload(decode=True)
+    if (    six.PY3 and
+            part.get_content_maintype() == 'text' and
+            isinstance(cte_payload, bytes)):
+        charset = part.get_charset() or 'utf-8'
+        return cte_payload.decode(charset, errors='surrogateescape')
+    return cte_payload
+
+
 # Path for DMI Data
 DMI_SYS_PATH = "/sys/class/dmi/id"
 
-- 
cgit v1.2.3


From 31a8aab92656279b141a9c29e484c4895bde15d3 Mon Sep 17 00:00:00 2001
From: Oleg Strikov <oleg.strikov@canonical.com>
Date: Wed, 11 Mar 2015 20:22:54 +0300
Subject: userdata-handlers: python3-related fixes on do-not-process-this-part
 path

Cloud-init crashed when received multipart userdata object with
'application/octet-stream' part or some other 'application/*' part
except archived ones (x-gzip and friends). These parts are not
processed by cloud-init and result only in a message in the log.
We used some non-python3-friendly techniques while generating
this log message which was a reason for the crash.
---
 cloudinit/handlers/__init__.py | 24 ++++++++++++++++++------
 tests/unittests/test_data.py   | 18 ++++++++++++++++++
 2 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'cloudinit/handlers/__init__.py')

diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py
index 6b7abbcd..d62fcd19 100644
--- a/cloudinit/handlers/__init__.py
+++ b/cloudinit/handlers/__init__.py
@@ -163,12 +163,19 @@ def walker_handle_handler(pdata, _ctype, _filename, payload):
 
 
 def _extract_first_or_bytes(blob, size):
-    # Extract the first line upto X bytes or X bytes from more than the
-    # first line if the first line does not contain enough bytes
-    first_line = blob.split("\n", 1)[0]
-    if len(first_line) >= size:
-        start = first_line[:size]
-    else:
+    # Extract the first line or upto X symbols for text objects
+    # Extract first X bytes for binary objects
+    try:
+        if isinstance(blob, six.string_types):
+            start = blob.split("\n", 1)[0]
+        else:
+            # We want to avoid decoding the whole blob (it might be huge)
+            # By taking 4*size bytes we have a guarantee to decode size utf8 chars
+            start = blob[:4*size].decode(errors='ignore').split("\n", 1)[0]
+        if len(start) >= size:
+            start = start[:size]
+    except UnicodeDecodeError:
+        # Bytes array doesn't contain a text object -- return chunk of raw bytes
         start = blob[0:size]
     return start
 
@@ -183,6 +190,11 @@ def _escape_string(text):
         except TypeError:
             # Give up...
             pass
+    except AttributeError:
+        # We're in Python3 and received blob as text
+        # No escaping is needed because bytes are printed
+        # as 'b\xAA\xBB' automatically in Python3
+        pass
     return text
 
 
diff --git a/tests/unittests/test_data.py b/tests/unittests/test_data.py
index 8fc280e4..4f24e2dd 100644
--- a/tests/unittests/test_data.py
+++ b/tests/unittests/test_data.py
@@ -13,6 +13,7 @@ except ImportError:
 
 from six import BytesIO, StringIO
 
+from email import encoders
 from email.mime.application import MIMEApplication
 from email.mime.base import MIMEBase
 from email.mime.multipart import MIMEMultipart
@@ -492,6 +493,23 @@ c: 4
             mock.call(ci.paths.get_ipath("cloud_config"), "", 0o600),
             ])
 
+    def test_mime_application_octet_stream(self):
+        """Mime message of type application/octet-stream is ignored but shows warning."""
+        ci = stages.Init()
+        message = MIMEBase("application", "octet-stream")
+        message.set_payload(b'\xbf\xe6\xb2\xc3\xd3\xba\x13\xa4\xd8\xa1\xcc\xbf')
+        encoders.encode_base64(message)
+        ci.datasource = FakeDataSource(message.as_string().encode())
+
+        with mock.patch('cloudinit.util.write_file') as mockobj:
+            log_file = self.capture_log(logging.WARNING)
+            ci.fetch()
+            ci.consume_data()
+            self.assertIn(
+                "Unhandled unknown content-type (application/octet-stream)",
+                log_file.getvalue())
+        mockobj.assert_called_once_with(
+            ci.paths.get_ipath("cloud_config"), "", 0o600)
 
 class TestUDProcess(helpers.ResourceUsingTestCase):
 
-- 
cgit v1.2.3


From dcd4b2b371059bd6249b4e43af371ee1162273e8 Mon Sep 17 00:00:00 2001
From: Scott Moser <smoser@ubuntu.com>
Date: Thu, 16 Apr 2015 16:41:06 -0400
Subject: pep8 fixes

---
 cloudinit/config/cc_snappy.py  | 4 ++--
 cloudinit/handlers/__init__.py | 6 +++---
 tests/unittests/test_data.py   | 5 +++--
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'cloudinit/handlers/__init__.py')

diff --git a/cloudinit/config/cc_snappy.py b/cloudinit/config/cc_snappy.py
index 6a7ae09b..bfe76558 100644
--- a/cloudinit/config/cc_snappy.py
+++ b/cloudinit/config/cc_snappy.py
@@ -72,7 +72,7 @@ def parse_filename(fname):
     name = fname_noext.partition("_")[0]
     shortname = name.partition(".")[0]
     return(name, shortname, fname_noext)
-    
+
 
 def get_fs_package_ops(fspath):
     if not fspath:
@@ -98,7 +98,7 @@ def makeop(op, name, config=None, path=None, cfgfile=None):
 
 def get_package_config(configs, name):
     # load the package's config from the configs dict.
-    # prefer full-name entry (config-example.canonical) 
+    # prefer full-name entry (config-example.canonical)
     # over short name entry (config-example)
     if name in configs:
         return configs[name]
diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py
index d62fcd19..52defe66 100644
--- a/cloudinit/handlers/__init__.py
+++ b/cloudinit/handlers/__init__.py
@@ -170,12 +170,12 @@ def _extract_first_or_bytes(blob, size):
             start = blob.split("\n", 1)[0]
         else:
             # We want to avoid decoding the whole blob (it might be huge)
-            # By taking 4*size bytes we have a guarantee to decode size utf8 chars
-            start = blob[:4*size].decode(errors='ignore').split("\n", 1)[0]
+            # By taking 4*size bytes we guarantee to decode size utf8 chars
+            start = blob[:4 * size].decode(errors='ignore').split("\n", 1)[0]
         if len(start) >= size:
             start = start[:size]
     except UnicodeDecodeError:
-        # Bytes array doesn't contain a text object -- return chunk of raw bytes
+        # Bytes array doesn't contain text so return chunk of raw bytes
         start = blob[0:size]
     return start
 
diff --git a/tests/unittests/test_data.py b/tests/unittests/test_data.py
index 4f24e2dd..b950c9a5 100644
--- a/tests/unittests/test_data.py
+++ b/tests/unittests/test_data.py
@@ -494,10 +494,10 @@ c: 4
             ])
 
     def test_mime_application_octet_stream(self):
-        """Mime message of type application/octet-stream is ignored but shows warning."""
+        """Mime type application/octet-stream is ignored but shows warning."""
         ci = stages.Init()
         message = MIMEBase("application", "octet-stream")
-        message.set_payload(b'\xbf\xe6\xb2\xc3\xd3\xba\x13\xa4\xd8\xa1\xcc\xbf')
+        message.set_payload(b'\xbf\xe6\xb2\xc3\xd3\xba\x13\xa4\xd8\xa1\xcc')
         encoders.encode_base64(message)
         ci.datasource = FakeDataSource(message.as_string().encode())
 
@@ -511,6 +511,7 @@ c: 4
         mockobj.assert_called_once_with(
             ci.paths.get_ipath("cloud_config"), "", 0o600)
 
+
 class TestUDProcess(helpers.ResourceUsingTestCase):
 
     def test_bytes_in_userdata(self):
-- 
cgit v1.2.3


From 341a805fca9a06ce12e9f4bbbe15b3dded9eb6a4 Mon Sep 17 00:00:00 2001
From: Scott Moser <smoser@ubuntu.com>
Date: Thu, 16 Apr 2015 17:00:19 -0400
Subject: fix cloud-config-archive handling

handling of cloud-config-archive input would fail in fully_decoded_payload.
part.get_charset() would return a Charset object, but
get_charset.input_codec is a string suitable for passing to decode.

This handles that correctly, and is more careful about binary data inside
input.

The test added verifies that cloud-config inside a cloud-config-archive
is handled correctly and also that binary data there is ignored without
exceptions raised.

LP: #1445143
---
 cloudinit/handlers/__init__.py |  5 ++++-
 cloudinit/user_data.py         |  9 +++++++--
 cloudinit/util.py              |  8 ++++++--
 tests/unittests/test_data.py   | 27 +++++++++++++++++++++++++++
 4 files changed, 44 insertions(+), 5 deletions(-)

(limited to 'cloudinit/handlers/__init__.py')

diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py
index 52defe66..53d5604a 100644
--- a/cloudinit/handlers/__init__.py
+++ b/cloudinit/handlers/__init__.py
@@ -263,7 +263,10 @@ def fixup_handler(mod, def_freq=PER_INSTANCE):
 
 
 def type_from_starts_with(payload, default=None):
-    payload_lc = payload.lower()
+    try:
+        payload_lc = util.decode_binary(payload).lower()
+    except UnicodeDecodeError:
+        return default
     payload_lc = payload_lc.lstrip()
     for text in INCLUSION_SRCH:
         if payload_lc.startswith(text):
diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py
index eb3c7336..f7c5787c 100644
--- a/cloudinit/user_data.py
+++ b/cloudinit/user_data.py
@@ -49,6 +49,7 @@ INCLUDE_TYPES = ['text/x-include-url', 'text/x-include-once-url']
 ARCHIVE_TYPES = ["text/cloud-config-archive"]
 UNDEF_TYPE = "text/plain"
 ARCHIVE_UNDEF_TYPE = "text/cloud-config"
+ARCHIVE_UNDEF_BINARY_TYPE = "application/octet-stream"
 
 # This seems to hit most of the gzip possible content types.
 DECOMP_TYPES = [
@@ -265,11 +266,15 @@ class UserDataProcessor(object):
             content = ent.get('content', '')
             mtype = ent.get('type')
             if not mtype:
-                mtype = handlers.type_from_starts_with(content,
-                                                       ARCHIVE_UNDEF_TYPE)
+                default = ARCHIVE_UNDEF_TYPE
+                if isinstance(content, six.binary_type):
+                    default = ARCHIVE_UNDEF_BINARY_TYPE
+                mtype = handlers.type_from_starts_with(content, default)
 
             maintype, subtype = mtype.split('/', 1)
             if maintype == "text":
+                if isinstance(content, six.binary_type):
+                    content = content.decode()
                 msg = MIMEText(content, _subtype=subtype)
             else:
                 msg = MIMEBase(maintype, subtype)
diff --git a/cloudinit/util.py b/cloudinit/util.py
index 971c1c2d..cae57770 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -121,8 +121,12 @@ def fully_decoded_payload(part):
     if (six.PY3 and
             part.get_content_maintype() == 'text' and
             isinstance(cte_payload, bytes)):
-        charset = part.get_charset() or 'utf-8'
-        return cte_payload.decode(charset, errors='surrogateescape')
+        charset = part.get_charset()
+        if charset and charset.input_codec:
+            encoding = charset.input_codec
+        else:
+            encoding = 'utf-8'
+        return cte_payload.decode(encoding, errors='surrogateescape')
     return cte_payload
 
 
diff --git a/tests/unittests/test_data.py b/tests/unittests/test_data.py
index b950c9a5..1b15dafa 100644
--- a/tests/unittests/test_data.py
+++ b/tests/unittests/test_data.py
@@ -512,6 +512,33 @@ c: 4
             ci.paths.get_ipath("cloud_config"), "", 0o600)
 
 
+    def test_cloud_config_archive(self):
+        non_decodable = b'\x11\xc9\xb4gTH\xee\x12'
+        data = [{'content': '#cloud-config\npassword: gocubs\n'},
+                {'content': '#cloud-config\nlocale: chicago\n'},
+                {'content': non_decodable}]
+        message = b'#cloud-config-archive\n' + util.yaml_dumps(data).encode()
+
+        ci = stages.Init()
+        ci.datasource = FakeDataSource(message)
+
+        fs = {}
+
+        def fsstore(filename, content, mode=0o0644, omode="wb"):
+            fs[filename] = content
+
+        # consuming the user-data provided should write 'cloud_config' file
+        # which will have our yaml in it.
+        with mock.patch('cloudinit.util.write_file') as mockobj:
+            mockobj.side_effect = fsstore
+            ci.fetch()
+            ci.consume_data()
+
+        cfg = util.load_yaml(fs[ci.paths.get_ipath("cloud_config")])
+        self.assertEqual(cfg.get('password'), 'gocubs')
+        self.assertEqual(cfg.get('locale'), 'chicago')
+
+
 class TestUDProcess(helpers.ResourceUsingTestCase):
 
     def test_bytes_in_userdata(self):
-- 
cgit v1.2.3