summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2015-01-27 15:11:53 -0500
committerBarry Warsaw <barry@python.org>2015-01-27 15:11:53 -0500
commit96d130e7732f1242d71c65a32412ae56cb229abf (patch)
tree3fb39ac6dd43988f36507a1c0b82e8944dfe95ff
parent6e742d20e9ed56498925c7c850cd5da65d063b4b (diff)
downloadvyos-cloud-init-96d130e7732f1242d71c65a32412ae56cb229abf.tar.gz
vyos-cloud-init-96d130e7732f1242d71c65a32412ae56cb229abf.zip
Respond to review:
- Refactor "fully" decoding the payload of a text/* part. In Python 3, decode=True only means to decode according to Content-Transfer-Encoding, not according to any charset in the Content-Type header. So do that.
-rw-r--r--cloudinit/handlers/__init__.py11
-rw-r--r--cloudinit/user_data.py12
-rw-r--r--cloudinit/util.py15
3 files changed, 17 insertions, 21 deletions
diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py
index cdccf122..6b7abbcd 100644
--- a/cloudinit/handlers/__init__.py
+++ b/cloudinit/handlers/__init__.py
@@ -233,16 +233,7 @@ def walk(msg, callback, data):
headers = dict(part)
LOG.debug(headers)
headers['Content-Type'] = ctype
- payload = part.get_payload(decode=True)
- # In Python 3, decoding the payload will ironically hand us a bytes
- # object. 'decode' means to decode according to
- # Content-Transfer-Encoding, not according to any charset in the
- # Content-Type. So, if we end up with bytes, first try to decode to
- # str via CT charset, and failing that, try utf-8 using surrogate
- # escapes.
- if six.PY3 and isinstance(payload, bytes):
- charset = part.get_charset() or 'utf-8'
- payload = payload.decode(charset, errors='surrogateescape')
+ payload = util.fully_decoded_payload(part)
callback(data, filename, payload, headers)
partnum = partnum + 1
diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py
index bf5642a5..5fdc46f2 100644
--- a/cloudinit/user_data.py
+++ b/cloudinit/user_data.py
@@ -108,17 +108,7 @@ class UserDataProcessor(object):
ctype = None
ctype_orig = part.get_content_type()
- ctype_main = part.get_content_maintype()
- payload = part.get_payload(decode=True)
- # In Python 3, decoding the payload will ironically hand us a
- # bytes object. 'decode' means to decode according to
- # Content-Transfer-Encoding, not according to any charset in the
- # Content-Type. So, if we end up with bytes, first try to decode
- # to str via CT charset, and failing that, try utf-8 using
- # surrogate escapes.
- if six.PY3 and ctype_main == 'text' and isinstance(payload, bytes):
- charset = part.get_charset() or 'utf-8'
- payload = payload.decode(charset, errors='surrogateescape')
+ payload = util.fully_decoded_payload(part)
was_compressed = False
# When the message states it is of a gzipped content type ensure
diff --git a/cloudinit/util.py b/cloudinit/util.py
index 8916cc11..3a921afe 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -110,6 +110,21 @@ def b64e(source):
return b64encode(source).decode('utf-8')
+def fully_decoded_payload(part):
+ # In Python 3, decoding the payload will ironically hand us a bytes object.
+ # 'decode' means to decode according to Content-Transfer-Encoding, not
+ # according to any charset in the Content-Type. So, if we end up with
+ # bytes, first try to decode to str via CT charset, and failing that, try
+ # utf-8 using surrogate escapes.
+ cte_payload = part.get_payload(decode=True)
+ if ( six.PY3 and
+ part.get_content_maintype() == 'text' and
+ isinstance(cte_payload, bytes)):
+ charset = part.get_charset() or 'utf-8'
+ return cte_payload.decode(charset, errors='surrogateescape')
+ return cte_payload
+
+
# Path for DMI Data
DMI_SYS_PATH = "/sys/class/dmi/id"