1 files changed, 140 insertions, 64 deletions
diff --git a/cloudinit/util.py b/cloudinit/util.py
index 26456aa6..b845adfd 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -20,8 +20,6 @@
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-from StringIO import StringIO
-
 import contextlib
 import copy as obj_copy
 import ctypes
@@ -45,8 +43,11 @@ import subprocess
 import sys
 import tempfile
 import time
-import urlparse
 
+from base64 import b64decode, b64encode
+from six.moves.urllib import parse as urlparse
+
+import six
 import yaml
 
 from cloudinit import importer
@@ -69,8 +70,60 @@ FN_REPLACEMENTS = {
 }
 FN_ALLOWED = ('_-.()' + string.digits + string.ascii_letters)
 
+TRUE_STRINGS = ('true', '1', 'on', 'yes')
+FALSE_STRINGS = ('off', '0', 'no', 'false')
+
+
 # Helper utils to see if running in a container
-CONTAINER_TESTS = ['running-in-container', 'lxc-is-container']
+CONTAINER_TESTS = ('running-in-container', 'lxc-is-container')
+
+
+def decode_binary(blob, encoding='utf-8'):
+    # Converts a binary type into a text type using given encoding.
+    if isinstance(blob, six.text_type):
+        return blob
+    return blob.decode(encoding)
+
+
+def encode_text(text, encoding='utf-8'):
+    # Converts a text string into a binary type using given encoding.
+    if isinstance(text, six.binary_type):
+        return text
+    return text.encode(encoding)
+
+
+def b64d(source):
+    # Base64 decode some data, accepting bytes or unicode/str, and returning
+    # str/unicode if the result is utf-8 compatible, otherwise returning bytes.
+    decoded = b64decode(source)
+    try:
+        return decoded.decode('utf-8')
+    except UnicodeDecodeError:
+        return decoded
+
+
+def b64e(source):
+    # Base64 encode some data, accepting bytes or unicode/str, and returning
+    # str/unicode if the result is utf-8 compatible, otherwise returning bytes.
+    if not isinstance(source, bytes):
+        source = source.encode('utf-8')
+    return b64encode(source).decode('utf-8')
+
+
+def fully_decoded_payload(part):
+    # In Python 3, decoding the payload will ironically hand us a bytes object.
+    # 'decode' means to decode according to Content-Transfer-Encoding, not
+    # according to any charset in the Content-Type.  So, if we end up with
+    # bytes, first try to decode to str via CT charset, and failing that, try
+    # utf-8 using surrogate escapes.
+    cte_payload = part.get_payload(decode=True)
+    if (six.PY3 and
+            part.get_content_maintype() == 'text' and
+            isinstance(cte_payload, bytes)):
+        charset = part.get_charset() or 'utf-8'
+        return cte_payload.decode(charset, errors='surrogateescape')
+    return cte_payload
+
 
 # Path for DMI Data
 DMI_SYS_PATH = "/sys/class/dmi/id"
@@ -98,7 +151,7 @@ class ProcessExecutionError(IOError):
         else:
             self.description = description
 
-        if not isinstance(exit_code, (long, int)):
+        if not isinstance(exit_code, six.integer_types):
             self.exit_code = '-'
         else:
             self.exit_code = exit_code
@@ -127,6 +180,9 @@ class ProcessExecutionError(IOError):
             'reason': self.reason,
         }
         IOError.__init__(self, message)
+        # For backward compatibility with Python 2.
+        if not hasattr(self, 'message'):
+            self.message = message
 
 
 class SeLinuxGuard(object):
@@ -154,7 +210,8 @@ class SeLinuxGuard(object):
 
         path = os.path.realpath(self.path)
         # path should be a string, not unicode
-        path = str(path)
+        if six.PY2:
+            path = str(path)
         try:
             stats = os.lstat(path)
             self.selinux.matchpathcon(path, stats[stat.ST_MODE])
@@ -212,10 +269,10 @@ def fork_cb(child_cb, *args, **kwargs):
 def is_true(val, addons=None):
     if isinstance(val, (bool)):
         return val is True
-    check_set = ['true', '1', 'on', 'yes']
+    check_set = TRUE_STRINGS
     if addons:
-        check_set = check_set + addons
-    if str(val).lower().strip() in check_set:
+        check_set = list(check_set) + addons
+    if six.text_type(val).lower().strip() in check_set:
         return True
     return False
 
@@ -223,10 +280,10 @@ def is_true(val, addons=None):
 def is_false(val, addons=None):
     if isinstance(val, (bool)):
         return val is False
-    check_set = ['off', '0', 'no', 'false']
+    check_set = FALSE_STRINGS
     if addons:
-        check_set = check_set + addons
-    if str(val).lower().strip() in check_set:
+        check_set = list(check_set) + addons
+    if six.text_type(val).lower().strip() in check_set:
         return True
     return False
 
@@ -244,7 +301,7 @@ def translate_bool(val, addons=None):
 
 def rand_str(strlen=32, select_from=None):
     if not select_from:
-        select_from = string.letters + string.digits
+        select_from = string.ascii_letters + string.digits
     return "".join([random.choice(select_from) for _x in range(0, strlen)])
 
 
@@ -276,7 +333,7 @@ def uniq_merge_sorted(*lists):
 def uniq_merge(*lists):
     combined_list = []
     for a_list in lists:
-        if isinstance(a_list, (str, basestring)):
+        if isinstance(a_list, six.string_types):
             a_list = a_list.strip().split(",")
             # Kickout the empty ones
             a_list = [a for a in a_list if len(a)]
@@ -285,7 +342,7 @@ def uniq_merge(*lists):
 
 
 def clean_filename(fn):
-    for (k, v) in FN_REPLACEMENTS.iteritems():
+    for (k, v) in FN_REPLACEMENTS.items():
         fn = fn.replace(k, v)
     removals = []
     for k in fn:
@@ -297,16 +354,19 @@ def clean_filename(fn):
     return fn
 
 
-def decomp_gzip(data, quiet=True):
+def decomp_gzip(data, quiet=True, decode=True):
     try:
-        buf = StringIO(str(data))
+        buf = six.BytesIO(encode_text(data))
         with contextlib.closing(gzip.GzipFile(None, "rb", 1, buf)) as gh:
-            return gh.read()
+            if decode:
+                return decode_binary(gh.read())
+            else:
+                return gh.read()
     except Exception as e:
         if quiet:
             return data
         else:
-            raise DecompressionError(str(e))
+            raise DecompressionError(six.text_type(e))
 
 
 def extract_usergroup(ug_pair):
@@ -365,7 +425,7 @@ def multi_log(text, console=True, stderr=True,
 
 
 def load_json(text, root_types=(dict,)):
-    decoded = json.loads(text)
+    decoded = json.loads(decode_binary(text))
     if not isinstance(decoded, tuple(root_types)):
         expected_types = ", ".join([str(t) for t in root_types])
         raise TypeError("(%s) root types expected, got %s instead"
@@ -397,7 +457,7 @@ def get_cfg_option_str(yobj, key, default=None):
     if key not in yobj:
         return default
     val = yobj[key]
-    if not isinstance(val, (str, basestring)):
+    if not isinstance(val, six.string_types):
         val = str(val)
     return val
 
@@ -436,7 +496,7 @@ def get_cfg_option_list(yobj, key, default=None):
     if isinstance(val, (list)):
         cval = [v for v in val]
         return cval
-    if not isinstance(val, (basestring)):
+    if not isinstance(val, six.string_types):
         val = str(val)
     return [val]
 
@@ -711,10 +771,10 @@ def read_file_or_url(url, timeout=5, retries=10,
 
 def load_yaml(blob, default=None, allowed=(dict,)):
     loaded = default
+    blob = decode_binary(blob)
     try:
-        blob = str(blob)
-        LOG.debug(("Attempting to load yaml from string "
-                 "of length %s with allowed root types %s"),
+        LOG.debug("Attempting to load yaml from string "
+                 "of length %s with allowed root types %s",
                  len(blob), allowed)
         converted = safeyaml.load(blob)
         if not isinstance(converted, allowed):
@@ -749,14 +809,12 @@ def read_seeded(base="", ext="", timeout=5, retries=10, file_retries=0):
     md_resp = read_file_or_url(md_url, timeout, retries, file_retries)
     md = None
     if md_resp.ok():
-        md_str = str(md_resp)
-        md = load_yaml(md_str, default={})
+        md = load_yaml(md_resp.contents, default={})
 
     ud_resp = read_file_or_url(ud_url, timeout, retries, file_retries)
     ud = None
     if ud_resp.ok():
-        ud_str = str(ud_resp)
-        ud = ud_str
+        ud = ud_resp.contents
 
     return (md, ud)
 
@@ -787,7 +845,7 @@ def read_conf_with_confd(cfgfile):
     if "conf_d" in cfg:
         confd = cfg['conf_d']
         if confd:
-            if not isinstance(confd, (str, basestring)):
+            if not isinstance(confd, six.string_types):
                 raise TypeError(("Config file %s contains 'conf_d' "
                                  "with non-string type %s") %
                                  (cfgfile, type_utils.obj_name(confd)))
@@ -924,8 +982,8 @@ def get_cmdline_url(names=('cloud-config-url', 'url'),
         return (None, None, None)
 
     resp = read_file_or_url(url)
-    if resp.contents.startswith(starts) and resp.ok():
-        return (key, url, str(resp))
+    if resp.ok() and resp.contents.startswith(starts):
+        return (key, url, resp.contents)
 
     return (key, url, None)
 
@@ -1079,9 +1137,9 @@ def uniq_list(in_list):
     return out_list
 
 
-def load_file(fname, read_cb=None, quiet=False):
+def load_file(fname, read_cb=None, quiet=False, decode=True):
     LOG.debug("Reading from %s (quiet=%s)", fname, quiet)
-    ofh = StringIO()
+    ofh = six.BytesIO()
     try:
         with open(fname, 'rb') as ifh:
             pipe_in_out(ifh, ofh, chunk_cb=read_cb)
@@ -1092,7 +1150,10 @@ def load_file(fname, read_cb=None, quiet=False):
             raise
     contents = ofh.getvalue()
     LOG.debug("Read %s bytes from %s", len(contents), fname)
-    return contents
+    if decode:
+        return decode_binary(contents)
+    else:
+        return contents
 
 
 def get_cmdline():
@@ -1110,7 +1171,7 @@ def pipe_in_out(in_fh, out_fh, chunk_size=1024, chunk_cb=None):
     bytes_piped = 0
     while True:
         data = in_fh.read(chunk_size)
-        if data == '':
+        if len(data) == 0:
             break
         else:
             out_fh.write(data)
@@ -1216,13 +1277,20 @@ def logexc(log, msg, *args):
     # coming out to a non-debug stream
     if msg:
         log.warn(msg, *args)
-    # Debug gets the full trace
-    log.debug(msg, exc_info=1, *args)
+    # Debug gets the full trace.  However, nose has a bug whereby its
+    # logcapture plugin doesn't properly handle the case where there is no
+    # actual exception.  To avoid tracebacks during the test suite then, we'll
+    # do the actual exc_info extraction here, and if there is no exception in
+    # flight, we'll just pass in None.
+    exc_info = sys.exc_info()
+    if exc_info == (None, None, None):
+        exc_info = None
+    log.debug(msg, exc_info=exc_info, *args)
 
 
 def hash_blob(blob, routine, mlen=None):
     hasher = hashlib.new(routine)
-    hasher.update(blob)
+    hasher.update(encode_text(blob))
     digest = hasher.hexdigest()
     # Don't get to long now
     if mlen is not None:
@@ -1253,7 +1321,7 @@ def rename(src, dest):
     os.rename(src, dest)
 
 
-def ensure_dirs(dirlist, mode=0755):
+def ensure_dirs(dirlist, mode=0o755):
     for d in dirlist:
         ensure_dir(d, mode)
 
@@ -1267,7 +1335,7 @@ def read_write_cmdline_url(target_fn):
             return
         try:
             if key and content:
-                write_file(target_fn, content, mode=0600)
+                write_file(target_fn, content, mode=0o600)
                 LOG.debug(("Wrote to %s with contents of command line"
                           " url %s (len=%s)"), target_fn, url, len(content))
             elif key and not content:
@@ -1283,8 +1351,7 @@ def yaml_dumps(obj, explicit_start=True, explicit_end=True):
                           indent=4,
                           explicit_start=explicit_start,
                           explicit_end=explicit_end,
-                          default_flow_style=False,
-                          allow_unicode=True)
+                          default_flow_style=False)
 
 
 def ensure_dir(path, mode=None):
@@ -1492,7 +1559,7 @@ def append_file(path, content):
     write_file(path, content, omode="ab", mode=None)
 
 
-def ensure_file(path, mode=0644):
+def ensure_file(path, mode=0o644):
     write_file(path, content='', omode="ab", mode=mode)
 
 
@@ -1510,7 +1577,7 @@ def chmod(path, mode):
             os.chmod(path, real_mode)
 
 
-def write_file(filename, content, mode=0644, omode="wb"):
+def write_file(filename, content, mode=0o644, omode="wb"):
     """
     Writes a file with the given content and sets the file mode as specified.
     Resotres the SELinux context if possible.
@@ -1518,11 +1585,17 @@ def write_file(filename, content, mode=0644, omode="wb"):
     @param filename: The full path of the file to write.
     @param content: The content to write to the file.
     @param mode: The filesystem mode to set on the file.
-    @param omode: The open mode used when opening the file (r, rb, a, etc.)
+    @param omode: The open mode used when opening the file (w, wb, a, etc.)
     """
     ensure_dir(os.path.dirname(filename))
-    LOG.debug("Writing to %s - %s: [%s] %s bytes",
-               filename, omode, mode, len(content))
+    if 'b' in omode.lower():
+        content = encode_text(content)
+        write_type = 'bytes'
+    else:
+        content = decode_binary(content)
+        write_type = 'characters'
+    LOG.debug("Writing to %s - %s: [%s] %s %s",
+               filename, omode, mode, len(content), write_type)
     with SeLinuxGuard(path=filename):
         with open(filename, omode) as fh:
             fh.write(content)
@@ -1564,9 +1637,12 @@ def subp(args, data=None, rcs=None, env=None, capture=True, shell=False,
             stdout = subprocess.PIPE
             stderr = subprocess.PIPE
         stdin = subprocess.PIPE
-        sp = subprocess.Popen(args, stdout=stdout,
-                        stderr=stderr, stdin=stdin,
-                        env=env, shell=shell)
+        kws = dict(stdout=stdout, stderr=stderr, stdin=stdin,
+                   env=env, shell=shell)
+        if six.PY3:
+            # Use this so subprocess output will be (Python 3) str, not bytes.
+            kws['universal_newlines'] = True
+        sp = subprocess.Popen(args, **kws)
         (out, err) = sp.communicate(data)
     except OSError as e:
         raise ProcessExecutionError(cmd=args, reason=e)
@@ -1611,10 +1687,10 @@ def shellify(cmdlist, add_header=True):
         if isinstance(args, list):
             fixed = []
             for f in args:
-                fixed.append("'%s'" % (str(f).replace("'", escaped)))
+                fixed.append("'%s'" % (six.text_type(f).replace("'", escaped)))
             content = "%s%s\n" % (content, ' '.join(fixed))
             cmds_made += 1
-        elif isinstance(args, (str, basestring)):
+        elif isinstance(args, six.string_types):
             content = "%s%s\n" % (content, args)
             cmds_made += 1
         else:
@@ -1725,7 +1801,7 @@ def expand_package_list(version_fmt, pkgs):
 
     pkglist = []
     for pkg in pkgs:
-        if isinstance(pkg, basestring):
+        if isinstance(pkg, six.string_types):
             pkglist.append(pkg)
             continue
 
@@ -2021,23 +2097,23 @@ def _read_dmi_syspath(key):
     Reads dmi data with from /sys/class/dmi/id
     """
 
-    dmi_key = "{}/{}".format(DMI_SYS_PATH, key)
-    LOG.debug("querying dmi data {}".format(dmi_key))
+    dmi_key = "{0}/{1}".format(DMI_SYS_PATH, key)
+    LOG.debug("querying dmi data {0}".format(dmi_key))
     try:
         if not os.path.exists(dmi_key):
-            LOG.debug("did not find {}".format(dmi_key))
+            LOG.debug("did not find {0}".format(dmi_key))
             return None
 
         key_data = load_file(dmi_key)
         if not key_data:
-            LOG.debug("{} did not return any data".format(key))
+            LOG.debug("{0} did not return any data".format(key))
             return None
 
-        LOG.debug("dmi data {} returned {}".format(dmi_key, key_data))
+        LOG.debug("dmi data {0} returned {0}".format(dmi_key, key_data))
         return key_data.strip()
 
     except Exception as e:
-        logexc(LOG, "failed read of {}".format(dmi_key), e)
+        logexc(LOG, "failed read of {0}".format(dmi_key), e)
         return None
 
 
@@ -2049,10 +2125,10 @@ def _call_dmidecode(key, dmidecode_path):
     try:
         cmd = [dmidecode_path, "--string", key]
         (result, _err) = subp(cmd)
-        LOG.debug("dmidecode returned '{}' for '{}'".format(result, key))
+        LOG.debug("dmidecode returned '{0}' for '{0}'".format(result, key))
         return result
-    except OSError, _err:
-        LOG.debug('failed dmidecode cmd: {}\n{}'.format(cmd, _err.message))
+    except OSError as _err:
+        LOG.debug('failed dmidecode cmd: {0}\n{0}'.format(cmd, _err.message))
         return None
 
 
@@ -2068,7 +2144,7 @@ def read_dmi_data(key):
     if dmidecode_path:
         return _call_dmidecode(key, dmidecode_path)
 
-    LOG.warn("did not find either path {} or dmidecode command".format(
+    LOG.warn("did not find either path {0} or dmidecode command".format(
              DMI_SYS_PATH))
 
     return None