diff options
author | Scott Moser <smoser@ubuntu.com> | 2018-02-22 16:18:02 -0500 |
---|---|---|
committer | Scott Moser <smoser@ubuntu.com> | 2018-02-23 12:17:57 -0500 |
commit | 46cb6716c27d4496ce3d2bea7684803f522f277d (patch) | |
tree | e9daac696ae390683dd9f3fe57d1a4ac300ab8e6 | |
parent | 65f0b263a1af170802215823d6e354f8bdfa5f39 (diff) | |
download | vyos-cloud-init-46cb6716c27d4496ce3d2bea7684803f522f277d.tar.gz vyos-cloud-init-46cb6716c27d4496ce3d2bea7684803f522f277d.zip |
subp: Fix subp usage with non-ascii characters when no system locale.
If python starts up without a locale set, then its default encoding
ends up set as ascii. That is not easily changed with the likes of
setlocale. In order to avoid UnicodeDecodeErrors cloud-init will
encode to bytes a python3 string or python2 basestring so that the
values passed to Popen are already bytes.
LP: #1751051
-rw-r--r-- | cloudinit/util.py | 7 | ||||
-rw-r--r-- | tests/unittests/test_util.py | 34 |
2 files changed, 40 insertions, 1 deletions
diff --git a/cloudinit/util.py b/cloudinit/util.py index 338fb971..5a919cfe 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -1865,8 +1865,13 @@ def subp(args, data=None, rcs=None, env=None, capture=True, shell=False, if not isinstance(data, bytes): data = data.encode() + # Popen converts entries in the arguments array from non-bytes to bytes. + # When locale is unset it may use ascii for that encoding which can + # cause UnicodeDecodeErrors. (LP: #1751051) + bytes_args = [x if isinstance(x, six.binary_type) else x.encode("utf-8") + for x in args] try: - sp = subprocess.Popen(args, stdout=stdout, + sp = subprocess.Popen(bytes_args, stdout=stdout, stderr=stderr, stdin=stdin, env=env, shell=shell) (out, err) = sp.communicate(data) diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py index 4a92e741..89ae40f5 100644 --- a/tests/unittests/test_util.py +++ b/tests/unittests/test_util.py @@ -8,7 +8,9 @@ import shutil import stat import tempfile +import json import six +import sys import yaml from cloudinit import importer, util @@ -733,6 +735,38 @@ class TestSubp(helpers.CiTestCase): self.assertEqual("/target/my/path/", util.target_path("/target/", "///my/path/")) + def test_c_lang_can_take_utf8_args(self): + """Independent of system LC_CTYPE, args can contain utf-8 strings. + + When python starts up, its default encoding gets set based on + the value of LC_CTYPE. If no system locale is set, the default + encoding for both python2 and python3 in some paths will end up + being ascii. + + Attempts to use setlocale or patching (or changing) os.environ + in the current environment seem to not be effective. + + This test starts up a python with LC_CTYPE set to C so that + the default encoding will be set to ascii. In such an environment + Popen(['command', 'non-ascii-arg']) would cause a UnicodeDecodeError. + """ + python_prog = '\n'.join([ + 'import json, sys', + 'from cloudinit.util import subp', + 'data = sys.stdin.read()', + 'cmd = json.loads(data)', + 'subp(cmd, capture=False)', + '']) + cmd = [BASH, '-c', 'echo -n "$@"', '--', + self.utf8_valid.decode("utf-8")] + python_subp = [sys.executable, '-c', python_prog] + + out, _err = util.subp( + python_subp, update_env={'LC_CTYPE': 'C'}, + data=json.dumps(cmd).encode("utf-8"), + decode=False) + self.assertEqual(self.utf8_valid, out) + class TestEncode(helpers.TestCase): """Test the encoding functions""" |