diff options
author | Barry Warsaw <barry@python.org> | 2015-01-27 15:11:53 -0500 |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2015-01-27 15:11:53 -0500 |
commit | 96d130e7732f1242d71c65a32412ae56cb229abf (patch) | |
tree | 3fb39ac6dd43988f36507a1c0b82e8944dfe95ff | |
parent | 6e742d20e9ed56498925c7c850cd5da65d063b4b (diff) | |
download | vyos-cloud-init-96d130e7732f1242d71c65a32412ae56cb229abf.tar.gz vyos-cloud-init-96d130e7732f1242d71c65a32412ae56cb229abf.zip |
Respond to review:
- Refactor "fully" decoding the payload of a text/* part. In Python 3,
decode=True only means to decode according to Content-Transfer-Encoding, not
according to any charset in the Content-Type header. So do that.
-rw-r--r-- | cloudinit/handlers/__init__.py | 11 | ||||
-rw-r--r-- | cloudinit/user_data.py | 12 | ||||
-rw-r--r-- | cloudinit/util.py | 15 |
3 files changed, 17 insertions, 21 deletions
diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index cdccf122..6b7abbcd 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -233,16 +233,7 @@ def walk(msg, callback, data): headers = dict(part) LOG.debug(headers) headers['Content-Type'] = ctype - payload = part.get_payload(decode=True) - # In Python 3, decoding the payload will ironically hand us a bytes - # object. 'decode' means to decode according to - # Content-Transfer-Encoding, not according to any charset in the - # Content-Type. So, if we end up with bytes, first try to decode to - # str via CT charset, and failing that, try utf-8 using surrogate - # escapes. - if six.PY3 and isinstance(payload, bytes): - charset = part.get_charset() or 'utf-8' - payload = payload.decode(charset, errors='surrogateescape') + payload = util.fully_decoded_payload(part) callback(data, filename, payload, headers) partnum = partnum + 1 diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index bf5642a5..5fdc46f2 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -108,17 +108,7 @@ class UserDataProcessor(object): ctype = None ctype_orig = part.get_content_type() - ctype_main = part.get_content_maintype() - payload = part.get_payload(decode=True) - # In Python 3, decoding the payload will ironically hand us a - # bytes object. 'decode' means to decode according to - # Content-Transfer-Encoding, not according to any charset in the - # Content-Type. So, if we end up with bytes, first try to decode - # to str via CT charset, and failing that, try utf-8 using - # surrogate escapes. - if six.PY3 and ctype_main == 'text' and isinstance(payload, bytes): - charset = part.get_charset() or 'utf-8' - payload = payload.decode(charset, errors='surrogateescape') + payload = util.fully_decoded_payload(part) was_compressed = False # When the message states it is of a gzipped content type ensure diff --git a/cloudinit/util.py b/cloudinit/util.py index 8916cc11..3a921afe 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -110,6 +110,21 @@ def b64e(source): return b64encode(source).decode('utf-8') +def fully_decoded_payload(part): + # In Python 3, decoding the payload will ironically hand us a bytes object. + # 'decode' means to decode according to Content-Transfer-Encoding, not + # according to any charset in the Content-Type. So, if we end up with + # bytes, first try to decode to str via CT charset, and failing that, try + # utf-8 using surrogate escapes. + cte_payload = part.get_payload(decode=True) + if ( six.PY3 and + part.get_content_maintype() == 'text' and + isinstance(cte_payload, bytes)): + charset = part.get_charset() or 'utf-8' + return cte_payload.decode(charset, errors='surrogateescape') + return cte_payload + + # Path for DMI Data DMI_SYS_PATH = "/sys/class/dmi/id" |