summaryrefslogtreecommitdiff
path: root/cloudinit/handlers
diff options
context:
space:
mode:
Diffstat (limited to 'cloudinit/handlers')
-rw-r--r--cloudinit/handlers/__init__.py21
1 files changed, 16 insertions, 5 deletions
diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py
index d67a70ea..cdccf122 100644
--- a/cloudinit/handlers/__init__.py
+++ b/cloudinit/handlers/__init__.py
@@ -22,6 +22,7 @@
import abc
import os
+import six
from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE, FREQUENCIES)
@@ -174,11 +175,11 @@ def _extract_first_or_bytes(blob, size):
def _escape_string(text):
try:
- return text.encode("string-escape")
- except TypeError:
+ return text.encode("string_escape")
+ except (LookupError, TypeError):
try:
- # Unicode doesn't support string-escape...
- return text.encode('unicode-escape')
+ # Unicode (and Python 3's str) doesn't support string_escape...
+ return text.encode('unicode_escape')
except TypeError:
# Give up...
pass
@@ -232,7 +233,17 @@ def walk(msg, callback, data):
headers = dict(part)
LOG.debug(headers)
headers['Content-Type'] = ctype
- callback(data, filename, part.get_payload(decode=True), headers)
+ payload = part.get_payload(decode=True)
+ # In Python 3, decoding the payload will ironically hand us a bytes
+ # object. 'decode' means to decode according to
+ # Content-Transfer-Encoding, not according to any charset in the
+ # Content-Type. So, if we end up with bytes, first try to decode to
+ # str via CT charset, and failing that, try utf-8 using surrogate
+ # escapes.
+ if six.PY3 and isinstance(payload, bytes):
+ charset = part.get_charset() or 'utf-8'
+ payload = payload.decode(charset, errors='surrogateescape')
+ callback(data, filename, payload, headers)
partnum = partnum + 1