diff options
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | cloudinit/settings.py | 1 | ||||
-rw-r--r-- | cloudinit/sources/DataSourceVMware.py | 871 | ||||
-rw-r--r-- | doc/rtd/topics/availability.rst | 1 | ||||
-rw-r--r-- | doc/rtd/topics/datasources.rst | 1 | ||||
-rw-r--r-- | doc/rtd/topics/datasources/vmware.rst | 359 | ||||
-rw-r--r-- | requirements.txt | 9 | ||||
-rw-r--r-- | tests/unittests/test_datasource/test_common.py | 3 | ||||
-rw-r--r-- | tests/unittests/test_datasource/test_vmware.py | 377 | ||||
-rw-r--r-- | tests/unittests/test_ds_identify.py | 279 | ||||
-rw-r--r-- | tools/.github-cla-signers | 1 | ||||
-rwxr-xr-x | tools/ds-identify | 76 |
12 files changed, 1977 insertions, 3 deletions
@@ -39,7 +39,7 @@ get in contact with that distribution and send them our way! | Supported OSes | Supported Public Clouds | Supported Private Clouds | | --- | --- | --- | -| Alpine Linux<br />ArchLinux<br />Debian<br />DragonFlyBSD<br />Fedora<br />FreeBSD<br />Gentoo Linux<br />NetBSD<br />OpenBSD<br />RHEL/CentOS/AlmaLinux/Rocky/PhotonOS/Virtuozzo/EuroLinux<br />SLES/openSUSE<br />Ubuntu<br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /> | Amazon Web Services<br />Microsoft Azure<br />Google Cloud Platform<br />Oracle Cloud Infrastructure<br />Softlayer<br />Rackspace Public Cloud<br />IBM Cloud<br />DigitalOcean<br />Bigstep<br />Hetzner<br />Joyent<br />CloudSigma<br />Alibaba Cloud<br />OVH<br />OpenNebula<br />Exoscale<br />Scaleway<br />CloudStack<br />AltCloud<br />SmartOS<br />HyperOne<br />Vultr<br />Rootbox<br /> | Bare metal installs<br />OpenStack<br />LXD<br />KVM<br />Metal-as-a-Service (MAAS)<br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br />| +| Alpine Linux<br />ArchLinux<br />Debian<br />DragonFlyBSD<br />Fedora<br />FreeBSD<br />Gentoo Linux<br />NetBSD<br />OpenBSD<br />RHEL/CentOS/AlmaLinux/Rocky/PhotonOS/Virtuozzo/EuroLinux<br />SLES/openSUSE<br />Ubuntu<br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /> | Amazon Web Services<br />Microsoft Azure<br />Google Cloud Platform<br />Oracle Cloud Infrastructure<br />Softlayer<br />Rackspace Public Cloud<br />IBM Cloud<br />DigitalOcean<br />Bigstep<br />Hetzner<br />Joyent<br />CloudSigma<br />Alibaba Cloud<br />OVH<br />OpenNebula<br />Exoscale<br />Scaleway<br />CloudStack<br />AltCloud<br />SmartOS<br />HyperOne<br />Vultr<br />Rootbox<br /> | Bare metal installs<br />OpenStack<br />LXD<br />KVM<br />Metal-as-a-Service (MAAS)<br />VMware<br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br />| ## To start developing cloud-init diff --git a/cloudinit/settings.py b/cloudinit/settings.py index 23e4c0ad..f69005ea 100644 --- a/cloudinit/settings.py +++ b/cloudinit/settings.py @@ -43,6 +43,7 @@ CFG_BUILTIN = { 'Exoscale', 'RbxCloud', 'UpCloud', + 'VMware', # At the end to act as a 'catch' when none of the above work... 'None', ], diff --git a/cloudinit/sources/DataSourceVMware.py b/cloudinit/sources/DataSourceVMware.py new file mode 100644 index 00000000..22ca63de --- /dev/null +++ b/cloudinit/sources/DataSourceVMware.py @@ -0,0 +1,871 @@ +# Cloud-Init DataSource for VMware +# +# Copyright (c) 2018-2021 VMware, Inc. All Rights Reserved. +# +# Authors: Anish Swaminathan <anishs@vmware.com> +# Andrew Kutz <akutz@vmware.com> +# +# This file is part of cloud-init. See LICENSE file for license information. + +"""Cloud-Init DataSource for VMware + +This module provides a cloud-init datasource for VMware systems and supports +multiple transports types, including: + + * EnvVars + * GuestInfo + +Netifaces (https://github.com/al45tair/netifaces) + + Please note this module relies on the netifaces project to introspect the + runtime, network configuration of the host on which this datasource is + running. This is in contrast to the rest of cloud-init which uses the + cloudinit/netinfo module. + + The reasons for using netifaces include: + + * Netifaces is built in C and is more portable across multiple systems + and more deterministic than shell exec'ing local network commands and + parsing their output. + + * Netifaces provides a stable way to determine the view of the host's + network after DHCP has brought the network online. Unlike most other + datasources, this datasource still provides support for JINJA queries + based on networking information even when the network is based on a + DHCP lease. While this does not tie this datasource directly to + netifaces, it does mean the ability to consistently obtain the + correct information is paramount. + + * It is currently possible to execute this datasource on macOS + (which many developers use today) to print the output of the + get_host_info function. This function calls netifaces to obtain + the same runtime network configuration that the datasource would + persist to the local system's instance data. + + However, the netinfo module fails on macOS. The result is either a + hung operation that requires a SIGINT to return control to the user, + or, if brew is used to install iproute2mac, the ip commands are used + but produce output the netinfo module is unable to parse. + + While macOS is not a target of cloud-init, this feature is quite + useful when working on this datasource. + + For more information about this behavior, please see the following + PR comment, https://bit.ly/3fG7OVh. + + The authors of this datasource are not opposed to moving away from + netifaces. The goal may be to eventually do just that. This proviso was + added to the top of this module as a way to remind future-us and others + why netifaces was used in the first place in order to either smooth the + transition away from netifaces or embrace it further up the cloud-init + stack. +""" + +import collections +import copy +from distutils.spawn import find_executable +import ipaddress +import json +import os +import socket +import time + +from cloudinit import dmi, log as logging +from cloudinit import sources +from cloudinit import util +from cloudinit.subp import subp, ProcessExecutionError + +import netifaces + + +PRODUCT_UUID_FILE_PATH = "/sys/class/dmi/id/product_uuid" + +LOG = logging.getLogger(__name__) +NOVAL = "No value found" + +DATA_ACCESS_METHOD_ENVVAR = "envvar" +DATA_ACCESS_METHOD_GUESTINFO = "guestinfo" + +VMWARE_RPCTOOL = find_executable("vmware-rpctool") +REDACT = "redact" +CLEANUP_GUESTINFO = "cleanup-guestinfo" +VMX_GUESTINFO = "VMX_GUESTINFO" +GUESTINFO_EMPTY_YAML_VAL = "---" + +LOCAL_IPV4 = "local-ipv4" +LOCAL_IPV6 = "local-ipv6" +WAIT_ON_NETWORK = "wait-on-network" +WAIT_ON_NETWORK_IPV4 = "ipv4" +WAIT_ON_NETWORK_IPV6 = "ipv6" + + +class DataSourceVMware(sources.DataSource): + """ + Setting the hostname: + The hostname is set by way of the metadata key "local-hostname". + + Setting the instance ID: + The instance ID may be set by way of the metadata key "instance-id". + However, if this value is absent then the instance ID is read + from the file /sys/class/dmi/id/product_uuid. + + Configuring the network: + The network is configured by setting the metadata key "network" + with a value consistent with Network Config Versions 1 or 2, + depending on the Linux distro's version of cloud-init: + + Network Config Version 1 - http://bit.ly/cloudinit-net-conf-v1 + Network Config Version 2 - http://bit.ly/cloudinit-net-conf-v2 + + For example, CentOS 7's official cloud-init package is version + 0.7.9 and does not support Network Config Version 2. However, + this datasource still supports supplying Network Config Version 2 + data as long as the Linux distro's cloud-init package is new + enough to parse the data. + + The metadata key "network.encoding" may be used to indicate the + format of the metadata key "network". Valid encodings are base64 + and gzip+base64. + """ + + dsname = "VMware" + + def __init__(self, sys_cfg, distro, paths, ud_proc=None): + sources.DataSource.__init__(self, sys_cfg, distro, paths, ud_proc) + + self.data_access_method = None + self.vmware_rpctool = VMWARE_RPCTOOL + + def _get_data(self): + """ + _get_data loads the metadata, userdata, and vendordata from one of + the following locations in the given order: + + * envvars + * guestinfo + + Please note when updating this function with support for new data + transports, the order should match the order in the dscheck_VMware + function from the file ds-identify. + """ + + # Initialize the locally scoped metadata, userdata, and vendordata + # variables. They are assigned below depending on the detected data + # access method. + md, ud, vd = None, None, None + + # First check to see if there is data via env vars. + if os.environ.get(VMX_GUESTINFO, ""): + md = guestinfo_envvar("metadata") + ud = guestinfo_envvar("userdata") + vd = guestinfo_envvar("vendordata") + + if md or ud or vd: + self.data_access_method = DATA_ACCESS_METHOD_ENVVAR + + # At this point, all additional data transports are valid only on + # a VMware platform. + if not self.data_access_method: + system_type = dmi.read_dmi_data("system-product-name") + if system_type is None: + LOG.debug("No system-product-name found") + return False + if "vmware" not in system_type.lower(): + LOG.debug("Not a VMware platform") + return False + + # If no data was detected, check the guestinfo transport next. + if not self.data_access_method: + if self.vmware_rpctool: + md = guestinfo("metadata", self.vmware_rpctool) + ud = guestinfo("userdata", self.vmware_rpctool) + vd = guestinfo("vendordata", self.vmware_rpctool) + + if md or ud or vd: + self.data_access_method = DATA_ACCESS_METHOD_GUESTINFO + + if not self.data_access_method: + LOG.error("failed to find a valid data access method") + return False + + LOG.info("using data access method %s", self._get_subplatform()) + + # Get the metadata. + self.metadata = process_metadata(load_json_or_yaml(md)) + + # Get the user data. + self.userdata_raw = ud + + # Get the vendor data. + self.vendordata_raw = vd + + # Redact any sensitive information. + self.redact_keys() + + # get_data returns true if there is any available metadata, + # userdata, or vendordata. + if self.metadata or self.userdata_raw or self.vendordata_raw: + return True + else: + return False + + def setup(self, is_new_instance): + """setup(is_new_instance) + + This is called before user-data and vendor-data have been processed. + + Unless the datasource has set mode to 'local', then networking + per 'fallback' or per 'network_config' will have been written and + brought up the OS at this point. + """ + + host_info = wait_on_network(self.metadata) + LOG.info("got host-info: %s", host_info) + + # Reflect any possible local IPv4 or IPv6 addresses in the guest + # info. + advertise_local_ip_addrs(host_info) + + # Ensure the metadata gets updated with information about the + # host, including the network interfaces, default IP addresses, + # etc. + self.metadata = util.mergemanydict([self.metadata, host_info]) + + # Persist the instance data for versions of cloud-init that support + # doing so. This occurs here rather than in the get_data call in + # order to ensure that the network interfaces are up and can be + # persisted with the metadata. + self.persist_instance_data() + + def _get_subplatform(self): + get_key_name_fn = None + if self.data_access_method == DATA_ACCESS_METHOD_ENVVAR: + get_key_name_fn = get_guestinfo_envvar_key_name + elif self.data_access_method == DATA_ACCESS_METHOD_GUESTINFO: + get_key_name_fn = get_guestinfo_key_name + else: + return sources.METADATA_UNKNOWN + + return "%s (%s)" % ( + self.data_access_method, + get_key_name_fn("metadata"), + ) + + @property + def network_config(self): + if "network" in self.metadata: + LOG.debug("using metadata network config") + else: + LOG.debug("using fallback network config") + self.metadata["network"] = { + "config": self.distro.generate_fallback_config(), + } + return self.metadata["network"]["config"] + + def get_instance_id(self): + # Pull the instance ID out of the metadata if present. Otherwise + # read the file /sys/class/dmi/id/product_uuid for the instance ID. + if self.metadata and "instance-id" in self.metadata: + return self.metadata["instance-id"] + with open(PRODUCT_UUID_FILE_PATH, "r") as id_file: + self.metadata["instance-id"] = str(id_file.read()).rstrip().lower() + return self.metadata["instance-id"] + + def get_public_ssh_keys(self): + for key_name in ( + "public-keys-data", + "public_keys_data", + "public-keys", + "public_keys", + ): + if key_name in self.metadata: + return sources.normalize_pubkey_data(self.metadata[key_name]) + return [] + + def redact_keys(self): + # Determine if there are any keys to redact. + keys_to_redact = None + if REDACT in self.metadata: + keys_to_redact = self.metadata[REDACT] + elif CLEANUP_GUESTINFO in self.metadata: + # This is for backwards compatibility. + keys_to_redact = self.metadata[CLEANUP_GUESTINFO] + + if self.data_access_method == DATA_ACCESS_METHOD_GUESTINFO: + guestinfo_redact_keys(keys_to_redact, self.vmware_rpctool) + + +def decode(key, enc_type, data): + """ + decode returns the decoded string value of data + key is a string used to identify the data being decoded in log messages + """ + LOG.debug("Getting encoded data for key=%s, enc=%s", key, enc_type) + + raw_data = None + if enc_type in ["gzip+base64", "gz+b64"]: + LOG.debug("Decoding %s format %s", enc_type, key) + raw_data = util.decomp_gzip(util.b64d(data)) + elif enc_type in ["base64", "b64"]: + LOG.debug("Decoding %s format %s", enc_type, key) + raw_data = util.b64d(data) + else: + LOG.debug("Plain-text data %s", key) + raw_data = data + + return util.decode_binary(raw_data) + + +def get_none_if_empty_val(val): + """ + get_none_if_empty_val returns None if the provided value, once stripped + of its trailing whitespace, is empty or equal to GUESTINFO_EMPTY_YAML_VAL. + + The return value is always a string, regardless of whether the input is + a bytes class or a string. + """ + + # If the provided value is a bytes class, convert it to a string to + # simplify the rest of this function's logic. + val = util.decode_binary(val) + val = val.rstrip() + if len(val) == 0 or val == GUESTINFO_EMPTY_YAML_VAL: + return None + return val + + +def advertise_local_ip_addrs(host_info): + """ + advertise_local_ip_addrs gets the local IP address information from + the provided host_info map and sets the addresses in the guestinfo + namespace + """ + if not host_info: + return + + # Reflect any possible local IPv4 or IPv6 addresses in the guest + # info. + local_ipv4 = host_info.get(LOCAL_IPV4) + if local_ipv4: + guestinfo_set_value(LOCAL_IPV4, local_ipv4) + LOG.info("advertised local ipv4 address %s in guestinfo", local_ipv4) + + local_ipv6 = host_info.get(LOCAL_IPV6) + if local_ipv6: + guestinfo_set_value(LOCAL_IPV6, local_ipv6) + LOG.info("advertised local ipv6 address %s in guestinfo", local_ipv6) + + +def handle_returned_guestinfo_val(key, val): + """ + handle_returned_guestinfo_val returns the provided value if it is + not empty or set to GUESTINFO_EMPTY_YAML_VAL, otherwise None is + returned + """ + val = get_none_if_empty_val(val) + if val: + return val + LOG.debug("No value found for key %s", key) + return None + + +def get_guestinfo_key_name(key): + return "guestinfo." + key + + +def get_guestinfo_envvar_key_name(key): + return ("vmx." + get_guestinfo_key_name(key)).upper().replace(".", "_", -1) + + +def guestinfo_envvar(key): + val = guestinfo_envvar_get_value(key) + if not val: + return None + enc_type = guestinfo_envvar_get_value(key + ".encoding") + return decode(get_guestinfo_envvar_key_name(key), enc_type, val) + + +def guestinfo_envvar_get_value(key): + env_key = get_guestinfo_envvar_key_name(key) + return handle_returned_guestinfo_val(key, os.environ.get(env_key, "")) + + +def guestinfo(key, vmware_rpctool=VMWARE_RPCTOOL): + """ + guestinfo returns the guestinfo value for the provided key, decoding + the value when required + """ + val = guestinfo_get_value(key, vmware_rpctool) + if not val: + return None + enc_type = guestinfo_get_value(key + ".encoding", vmware_rpctool) + return decode(get_guestinfo_key_name(key), enc_type, val) + + +def guestinfo_get_value(key, vmware_rpctool=VMWARE_RPCTOOL): + """ + Returns a guestinfo value for the specified key. + """ + LOG.debug("Getting guestinfo value for key %s", key) + + try: + (stdout, stderr) = subp( + [ + vmware_rpctool, + "info-get " + get_guestinfo_key_name(key), + ] + ) + if stderr == NOVAL: + LOG.debug("No value found for key %s", key) + elif not stdout: + LOG.error("Failed to get guestinfo value for key %s", key) + return handle_returned_guestinfo_val(key, stdout) + except ProcessExecutionError as error: + if error.stderr == NOVAL: + LOG.debug("No value found for key %s", key) + else: + util.logexc( + LOG, + "Failed to get guestinfo value for key %s: %s", + key, + error, + ) + except Exception: + util.logexc( + LOG, + "Unexpected error while trying to get " + + "guestinfo value for key %s", + key, + ) + + return None + + +def guestinfo_set_value(key, value, vmware_rpctool=VMWARE_RPCTOOL): + """ + Sets a guestinfo value for the specified key. Set value to an empty string + to clear an existing guestinfo key. + """ + + # If value is an empty string then set it to a single space as it is not + # possible to set a guestinfo key to an empty string. Setting a guestinfo + # key to a single space is as close as it gets to clearing an existing + # guestinfo key. + if value == "": + value = " " + + LOG.debug("Setting guestinfo key=%s to value=%s", key, value) + + try: + subp( + [ + vmware_rpctool, + ("info-set %s %s" % (get_guestinfo_key_name(key), value)), + ] + ) + return True + except ProcessExecutionError as error: + util.logexc( + LOG, + "Failed to set guestinfo key=%s to value=%s: %s", + key, + value, + error, + ) + except Exception: + util.logexc( + LOG, + "Unexpected error while trying to set " + + "guestinfo key=%s to value=%s", + key, + value, + ) + + return None + + +def guestinfo_redact_keys(keys, vmware_rpctool=VMWARE_RPCTOOL): + """ + guestinfo_redact_keys redacts guestinfo of all of the keys in the given + list. each key will have its value set to "---". Since the value is valid + YAML, cloud-init can still read it if it tries. + """ + if not keys: + return + if not type(keys) in (list, tuple): + keys = [keys] + for key in keys: + key_name = get_guestinfo_key_name(key) + LOG.info("clearing %s", key_name) + if not guestinfo_set_value( + key, GUESTINFO_EMPTY_YAML_VAL, vmware_rpctool + ): + LOG.error("failed to clear %s", key_name) + LOG.info("clearing %s.encoding", key_name) + if not guestinfo_set_value(key + ".encoding", "", vmware_rpctool): + LOG.error("failed to clear %s.encoding", key_name) + + +def load_json_or_yaml(data): + """ + load first attempts to unmarshal the provided data as JSON, and if + that fails then attempts to unmarshal the data as YAML. If data is + None then a new dictionary is returned. + """ + if not data: + return {} + try: + return util.load_json(data) + except (json.JSONDecodeError, TypeError): + return util.load_yaml(data) + + +def process_metadata(data): + """ + process_metadata processes metadata and loads the optional network + configuration. + """ + network = None + if "network" in data: + network = data["network"] + del data["network"] + + network_enc = None + if "network.encoding" in data: + network_enc = data["network.encoding"] + del data["network.encoding"] + + if network: + if isinstance(network, collections.abc.Mapping): + LOG.debug("network data copied to 'config' key") + network = {"config": copy.deepcopy(network)} + else: + LOG.debug("network data to be decoded %s", network) + dec_net = decode("metadata.network", network_enc, network) + network = { + "config": load_json_or_yaml(dec_net), + } + + LOG.debug("network data %s", network) + data["network"] = network + + return data + + +# Used to match classes to dependencies +datasources = [ + (DataSourceVMware, (sources.DEP_FILESYSTEM,)), # Run at init-local + (DataSourceVMware, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)), +] + + +def get_datasource_list(depends): + """ + Return a list of data sources that match this set of dependencies + """ + return sources.list_from_depends(depends, datasources) + + +def get_default_ip_addrs(): + """ + Returns the default IPv4 and IPv6 addresses based on the device(s) used for + the default route. Please note that None may be returned for either address + family if that family has no default route or if there are multiple + addresses associated with the device used by the default route for a given + address. + """ + # TODO(promote and use netifaces in cloudinit.net* modules) + gateways = netifaces.gateways() + if "default" not in gateways: + return None, None + + default_gw = gateways["default"] + if ( + netifaces.AF_INET not in default_gw + and netifaces.AF_INET6 not in default_gw + ): + return None, None + + ipv4 = None + ipv6 = None + + gw4 = default_gw.get(netifaces.AF_INET) + if gw4: + _, dev4 = gw4 + addr4_fams = netifaces.ifaddresses(dev4) + if addr4_fams: + af_inet4 = addr4_fams.get(netifaces.AF_INET) + if af_inet4: + if len(af_inet4) > 1: + LOG.warning( + "device %s has more than one ipv4 address: %s", + dev4, + af_inet4, + ) + elif "addr" in af_inet4[0]: + ipv4 = af_inet4[0]["addr"] + + # Try to get the default IPv6 address by first seeing if there is a default + # IPv6 route. + gw6 = default_gw.get(netifaces.AF_INET6) + if gw6: + _, dev6 = gw6 + addr6_fams = netifaces.ifaddresses(dev6) + if addr6_fams: + af_inet6 = addr6_fams.get(netifaces.AF_INET6) + if af_inet6: + if len(af_inet6) > 1: + LOG.warning( + "device %s has more than one ipv6 address: %s", + dev6, + af_inet6, + ) + elif "addr" in af_inet6[0]: + ipv6 = af_inet6[0]["addr"] + + # If there is a default IPv4 address but not IPv6, then see if there is a + # single IPv6 address associated with the same device associated with the + # default IPv4 address. + if ipv4 and not ipv6: + af_inet6 = addr4_fams.get(netifaces.AF_INET6) + if af_inet6: + if len(af_inet6) > 1: + LOG.warning( + "device %s has more than one ipv6 address: %s", + dev4, + af_inet6, + ) + elif "addr" in af_inet6[0]: + ipv6 = af_inet6[0]["addr"] + + # If there is a default IPv6 address but not IPv4, then see if there is a + # single IPv4 address associated with the same device associated with the + # default IPv6 address. + if not ipv4 and ipv6: + af_inet4 = addr6_fams.get(netifaces.AF_INET) + if af_inet4: + if len(af_inet4) > 1: + LOG.warning( + "device %s has more than one ipv4 address: %s", + dev6, + af_inet4, + ) + elif "addr" in af_inet4[0]: + ipv4 = af_inet4[0]["addr"] + + return ipv4, ipv6 + + +# patched socket.getfqdn() - see https://bugs.python.org/issue5004 + + +def getfqdn(name=""): + """Get fully qualified domain name from name. + An empty argument is interpreted as meaning the local host. + """ + # TODO(may want to promote this function to util.getfqdn) + # TODO(may want to extend util.get_hostname to accept fqdn=True param) + name = name.strip() + if not name or name == "0.0.0.0": + name = util.get_hostname() + try: + addrs = socket.getaddrinfo( + name, None, 0, socket.SOCK_DGRAM, 0, socket.AI_CANONNAME + ) + except socket.error: + pass + else: + for addr in addrs: + if addr[3]: + name = addr[3] + break + return name + + +def is_valid_ip_addr(val): + """ + Returns false if the address is loopback, link local or unspecified; + otherwise true is returned. + """ + # TODO(extend cloudinit.net.is_ip_addr exclude link_local/loopback etc) + # TODO(migrate to use cloudinit.net.is_ip_addr)# + + addr = None + try: + addr = ipaddress.ip_address(val) + except ipaddress.AddressValueError: + addr = ipaddress.ip_address(str(val)) + except Exception: + return None + + if addr.is_link_local or addr.is_loopback or addr.is_unspecified: + return False + return True + + +def get_host_info(): + """ + Returns host information such as the host name and network interfaces. + """ + # TODO(look to promote netifices use up in cloud-init netinfo funcs) + host_info = { + "network": { + "interfaces": { + "by-mac": collections.OrderedDict(), + "by-ipv4": collections.OrderedDict(), + "by-ipv6": collections.OrderedDict(), + }, + }, + } + hostname = getfqdn(util.get_hostname()) + if hostname: + host_info["hostname"] = hostname + host_info["local-hostname"] = hostname + host_info["local_hostname"] = hostname + + default_ipv4, default_ipv6 = get_default_ip_addrs() + if default_ipv4: + host_info[LOCAL_IPV4] = default_ipv4 + if default_ipv6: + host_info[LOCAL_IPV6] = default_ipv6 + + by_mac = host_info["network"]["interfaces"]["by-mac"] + by_ipv4 = host_info["network"]["interfaces"]["by-ipv4"] + by_ipv6 = host_info["network"]["interfaces"]["by-ipv6"] + + ifaces = netifaces.interfaces() + for dev_name in ifaces: + addr_fams = netifaces.ifaddresses(dev_name) + af_link = addr_fams.get(netifaces.AF_LINK) + af_inet4 = addr_fams.get(netifaces.AF_INET) + af_inet6 = addr_fams.get(netifaces.AF_INET6) + + mac = None + if af_link and "addr" in af_link[0]: + mac = af_link[0]["addr"] + + # Do not bother recording localhost + if mac == "00:00:00:00:00:00": + continue + + if mac and (af_inet4 or af_inet6): + key = mac + val = {} + if af_inet4: + af_inet4_vals = [] + for ip_info in af_inet4: + if not is_valid_ip_addr(ip_info["addr"]): + continue + af_inet4_vals.append(ip_info) + val["ipv4"] = af_inet4_vals + if af_inet6: + af_inet6_vals = [] + for ip_info in af_inet6: + if not is_valid_ip_addr(ip_info["addr"]): + continue + af_inet6_vals.append(ip_info) + val["ipv6"] = af_inet6_vals + by_mac[key] = val + + if af_inet4: + for ip_info in af_inet4: + key = ip_info["addr"] + if not is_valid_ip_addr(key): + continue + val = copy.deepcopy(ip_info) + del val["addr"] + if mac: + val["mac"] = mac + by_ipv4[key] = val + + if af_inet6: + for ip_info in af_inet6: + key = ip_info["addr"] + if not is_valid_ip_addr(key): + continue + val = copy.deepcopy(ip_info) + del val["addr"] + if mac: + val["mac"] = mac + by_ipv6[key] = val + + return host_info + + +def wait_on_network(metadata): + # Determine whether we need to wait on the network coming online. + wait_on_ipv4 = False + wait_on_ipv6 = False + if WAIT_ON_NETWORK in metadata: + wait_on_network = metadata[WAIT_ON_NETWORK] + if WAIT_ON_NETWORK_IPV4 in wait_on_network: + wait_on_ipv4_val = wait_on_network[WAIT_ON_NETWORK_IPV4] + if isinstance(wait_on_ipv4_val, bool): + wait_on_ipv4 = wait_on_ipv4_val + else: + wait_on_ipv4 = util.translate_bool(wait_on_ipv4_val) + if WAIT_ON_NETWORK_IPV6 in wait_on_network: + wait_on_ipv6_val = wait_on_network[WAIT_ON_NETWORK_IPV6] + if isinstance(wait_on_ipv6_val, bool): + wait_on_ipv6 = wait_on_ipv6_val + else: + wait_on_ipv6 = util.translate_bool(wait_on_ipv6_val) + + # Get information about the host. + host_info = None + while host_info is None: + # This loop + sleep results in two logs every second while waiting + # for either ipv4 or ipv6 up. Do we really need to log each iteration + # or can we log once and log on successful exit? + host_info = get_host_info() + + network = host_info.get("network") or {} + interfaces = network.get("interfaces") or {} + by_ipv4 = interfaces.get("by-ipv4") or {} + by_ipv6 = interfaces.get("by-ipv6") or {} + + if wait_on_ipv4: + ipv4_ready = len(by_ipv4) > 0 if by_ipv4 else False + if not ipv4_ready: + host_info = None + + if wait_on_ipv6: + ipv6_ready = len(by_ipv6) > 0 if by_ipv6 else False + if not ipv6_ready: + host_info = None + + if host_info is None: + LOG.debug( + "waiting on network: wait4=%s, ready4=%s, wait6=%s, ready6=%s", + wait_on_ipv4, + ipv4_ready, + wait_on_ipv6, + ipv6_ready, + ) + time.sleep(1) + + LOG.debug("waiting on network complete") + return host_info + + +def main(): + """ + Executed when this file is used as a program. + """ + try: + logging.setupBasicLogging() + except Exception: + pass + metadata = { + "wait-on-network": {"ipv4": True, "ipv6": "false"}, + "network": {"config": {"dhcp": True}}, + } + host_info = wait_on_network(metadata) + metadata = util.mergemanydict([metadata, host_info]) + print(util.json_dumps(metadata)) + + +if __name__ == "__main__": + main() + +# vi: ts=4 expandtab diff --git a/doc/rtd/topics/availability.rst b/doc/rtd/topics/availability.rst index e0644534..71827177 100644 --- a/doc/rtd/topics/availability.rst +++ b/doc/rtd/topics/availability.rst @@ -67,5 +67,6 @@ Additionally, cloud-init is supported on these private clouds: - LXD - KVM - Metal-as-a-Service (MAAS) +- VMware .. vi: textwidth=79 diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst index 497b1467..f5aee1c2 100644 --- a/doc/rtd/topics/datasources.rst +++ b/doc/rtd/topics/datasources.rst @@ -50,6 +50,7 @@ The following is a list of documents for each supported datasource: datasources/upcloud.rst datasources/zstack.rst datasources/vultr.rst + datasources/vmware.rst Creation ======== diff --git a/doc/rtd/topics/datasources/vmware.rst b/doc/rtd/topics/datasources/vmware.rst new file mode 100644 index 00000000..996eb61f --- /dev/null +++ b/doc/rtd/topics/datasources/vmware.rst @@ -0,0 +1,359 @@ +.. _datasource_vmware: + +VMware +====== + +This datasource is for use with systems running on a VMware platform such as +vSphere and currently supports the following data transports: + + +* `GuestInfo <https://github.com/vmware/govmomi/blob/master/govc/USAGE.md#vmchange>`_ keys + +Configuration +------------- + +The configuration method is dependent upon the transport: + +GuestInfo Keys +^^^^^^^^^^^^^^ + +One method of providing meta, user, and vendor data is by setting the following +key/value pairs on a VM's ``extraConfig`` `property <https://vdc-repo.vmware.com/vmwb-repository/dcr-public/723e7f8b-4f21-448b-a830-5f22fd931b01/5a8257bd-7f41-4423-9a73-03307535bd42/doc/vim.vm.ConfigInfo.html>`_ : + +.. list-table:: + :header-rows: 1 + + * - Property + - Description + * - ``guestinfo.metadata`` + - A YAML or JSON document containing the cloud-init metadata. + * - ``guestinfo.metadata.encoding`` + - The encoding type for ``guestinfo.metadata``. + * - ``guestinfo.userdata`` + - A YAML document containing the cloud-init user data. + * - ``guestinfo.userdata.encoding`` + - The encoding type for ``guestinfo.userdata``. + * - ``guestinfo.vendordata`` + - A YAML document containing the cloud-init vendor data. + * - ``guestinfo.vendordata.encoding`` + - The encoding type for ``guestinfo.vendordata``. + + +All ``guestinfo.*.encoding`` values may be set to ``base64`` or +``gzip+base64``. + +Features +-------- + +This section reviews several features available in this datasource, regardless +of how the meta, user, and vendor data was discovered. + +Instance data and lazy networks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +One of the hallmarks of cloud-init is `its use of instance-data and JINJA +queries <../instancedata.html#using-instance-data>`_ +-- the ability to write queries in user and vendor data that reference runtime +information present in ``/run/cloud-init/instance-data.json``. This works well +when the metadata provides all of the information up front, such as the network +configuration. For systems that rely on DHCP, however, this information may not +be available when the metadata is persisted to disk. + +This datasource ensures that even if the instance is using DHCP to configure +networking, the same details about the configured network are available in +``/run/cloud-init/instance-data.json`` as if static networking was used. This +information collected at runtime is easy to demonstrate by executing the +datasource on the command line. From the root of this repository, run the +following command: + +.. code-block:: bash + + PYTHONPATH="$(pwd)" python3 cloudinit/sources/DataSourceVMware.py + +The above command will result in output similar to the below JSON: + +.. code-block:: json + + { + "hostname": "akutz.localhost", + "local-hostname": "akutz.localhost", + "local-ipv4": "192.168.0.188", + "local_hostname": "akutz.localhost", + "network": { + "config": { + "dhcp": true + }, + "interfaces": { + "by-ipv4": { + "172.0.0.2": { + "netmask": "255.255.255.255", + "peer": "172.0.0.2" + }, + "192.168.0.188": { + "broadcast": "192.168.0.255", + "mac": "64:4b:f0:18:9a:21", + "netmask": "255.255.255.0" + } + }, + "by-ipv6": { + "fd8e:d25e:c5b6:1:1f5:b2fd:8973:22f2": { + "flags": 208, + "mac": "64:4b:f0:18:9a:21", + "netmask": "ffff:ffff:ffff:ffff::/64" + } + }, + "by-mac": { + "64:4b:f0:18:9a:21": { + "ipv4": [ + { + "addr": "192.168.0.188", + "broadcast": "192.168.0.255", + "netmask": "255.255.255.0" + } + ], + "ipv6": [ + { + "addr": "fd8e:d25e:c5b6:1:1f5:b2fd:8973:22f2", + "flags": 208, + "netmask": "ffff:ffff:ffff:ffff::/64" + } + ] + }, + "ac:de:48:00:11:22": { + "ipv6": [] + } + } + } + }, + "wait-on-network": { + "ipv4": true, + "ipv6": "false" + } + } + + +Redacting sensitive information +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sometimes the cloud-init userdata might contain sensitive information, and it +may be desirable to have the ``guestinfo.userdata`` key (or other guestinfo +keys) redacted as soon as its data is read by the datasource. This is possible +by adding the following to the metadata: + +.. code-block:: yaml + + redact: # formerly named cleanup-guestinfo, which will also work + - userdata + - vendordata + +When the above snippet is added to the metadata, the datasource will iterate +over the elements in the ``redact`` array and clear each of the keys. For +example, when the guestinfo transport is used, the above snippet will cause +the following commands to be executed: + +.. code-block:: shell + + vmware-rpctool "info-set guestinfo.userdata ---" + vmware-rpctool "info-set guestinfo.userdata.encoding " + vmware-rpctool "info-set guestinfo.vendordata ---" + vmware-rpctool "info-set guestinfo.vendordata.encoding " + +Please note that keys are set to the valid YAML string ``---`` as it is not +possible remove an existing key from the guestinfo key-space. A key's analogous +encoding property will be set to a single white-space character, causing the +datasource to treat the actual key value as plain-text, thereby loading it as +an empty YAML doc (hence the aforementioned ``---``\ ). + +Reading the local IP addresses +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This datasource automatically discovers the local IPv4 and IPv6 addresses for +a guest operating system based on the default routes. However, when inspecting +a VM externally, it's not possible to know what the *default* IP address is for +the guest OS. That's why this datasource sets the discovered, local IPv4 and +IPv6 addresses back in the guestinfo namespace as the following keys: + + +* ``guestinfo.local-ipv4`` +* ``guestinfo.local-ipv6`` + +It is possible that a host may not have any default, local IP addresses. It's +also possible the reported, local addresses are link-local addresses. But these +two keys may be used to discover what this datasource determined were the local +IPv4 and IPv6 addresses for a host. + +Waiting on the network +^^^^^^^^^^^^^^^^^^^^^^ + +Sometimes cloud-init may bring up the network, but it will not finish coming +online before the datasource's ``setup`` function is called, resulting in an +``/var/run/cloud-init/instance-data.json`` file that does not have the correct +network information. It is possible to instruct the datasource to wait until an +IPv4 or IPv6 address is available before writing the instance data with the +following metadata properties: + +.. code-block:: yaml + + wait-on-network: + ipv4: true + ipv6: true + +If either of the above values are true, then the datasource will sleep for a +second, check the network status, and repeat until one or both addresses from +the specified families are available. + +Walkthrough +----------- + +The following series of steps is a demonstration on how to configure a VM with +this datasource: + + +#. Create the metadata file for the VM. Save the following YAML to a file named + ``metadata.yaml``\ : + + .. code-block:: yaml + + instance-id: cloud-vm + local-hostname: cloud-vm + network: + version: 2 + ethernets: + nics: + match: + name: ens* + dhcp4: yes + +#. Create the userdata file ``userdata.yaml``\ : + + .. code-block:: yaml + + #cloud-config + + users: + - default + - name: akutz + primary_group: akutz + sudo: ALL=(ALL) NOPASSWD:ALL + groups: sudo, wheel + ssh_import_id: None + lock_passwd: true + ssh_authorized_keys: + - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDE0c5FczvcGSh/tG4iw+Fhfi/O5/EvUM/96js65tly4++YTXK1d9jcznPS5ruDlbIZ30oveCBd3kT8LLVFwzh6hepYTf0YmCTpF4eDunyqmpCXDvVscQYRXyasEm5olGmVe05RrCJSeSShAeptv4ueIn40kZKOghinGWLDSZG4+FFfgrmcMCpx5YSCtX2gvnEYZJr0czt4rxOZuuP7PkJKgC/mt2PcPjooeX00vAj81jjU2f3XKrjjz2u2+KIt9eba+vOQ6HiC8c2IzRkUAJ5i1atLy8RIbejo23+0P4N2jjk17QySFOVHwPBDTYb0/0M/4ideeU74EN/CgVsvO6JrLsPBR4dojkV5qNbMNxIVv5cUwIy2ThlLgqpNCeFIDLCWNZEFKlEuNeSQ2mPtIO7ETxEL2Cz5y/7AIuildzYMc6wi2bofRC8HmQ7rMXRWdwLKWsR0L7SKjHblIwarxOGqLnUI+k2E71YoP7SZSlxaKi17pqkr0OMCF+kKqvcvHAQuwGqyumTEWOlH6TCx1dSPrW+pVCZSHSJtSTfDW2uzL6y8k10MT06+pVunSrWo5LHAXcS91htHV1M1UrH/tZKSpjYtjMb5+RonfhaFRNzvj7cCE1f3Kp8UVqAdcGBTtReoE8eRUT63qIxjw03a7VwAyB2w+9cu1R9/vAo8SBeRqw== sakutz@gmail.com + +#. Please note this step requires that the VM be powered off. All of the + commands below use the VMware CLI tool, `govc <https://github.com/vmware/govmomi/blob/master/govc>`_. + + Go ahead and assign the path to the VM to the environment variable ``VM``\ : + + .. code-block:: shell + + export VM="/inventory/path/to/the/vm" + +#. Power off the VM: + + .. raw:: html + + <hr /> + + ⚠️ <strong>First Boot Mode</strong> + + To ensure the next power-on operation results in a first-boot scenario for + cloud-init, it may be necessary to run the following command just before + powering off the VM: + + .. code-block:: bash + + cloud-init clean + + Otherwise cloud-init may not run in first-boot mode. For more information + on how the boot mode is determined, please see the + `First Boot Documentation <../boot.html#first-boot-determination>`_. + + .. raw:: html + + <hr /> + + .. code-block:: shell + + govc vm.power -off "${VM}" + +#. + Export the environment variables that contain the cloud-init metadata and + userdata: + + .. code-block:: shell + + export METADATA=$(gzip -c9 <metadata.yaml | { base64 -w0 2>/dev/null || base64; }) \ + USERDATA=$(gzip -c9 <userdata.yaml | { base64 -w0 2>/dev/null || base64; }) + +#. + Assign the metadata and userdata to the VM: + + .. code-block:: shell + + govc vm.change -vm "${VM}" \ + -e guestinfo.metadata="${METADATA}" \ + -e guestinfo.metadata.encoding="gzip+base64" \ + -e guestinfo.userdata="${USERDATA}" \ + -e guestinfo.userdata.encoding="gzip+base64" + + Please note the above commands include specifying the encoding for the + properties. This is important as it informs the datasource how to decode + the data for cloud-init. Valid values for ``metadata.encoding`` and + ``userdata.encoding`` include: + + + * ``base64`` + * ``gzip+base64`` + +#. + Power on the VM: + + .. code-block:: shell + + govc vm.power -vm "${VM}" -on + +If all went according to plan, the CentOS box is: + +* Locked down, allowing SSH access only for the user in the userdata +* Configured for a dynamic IP address via DHCP +* Has a hostname of ``cloud-vm`` + +Examples +-------- + +This section reviews common configurations: + +Setting the hostname +^^^^^^^^^^^^^^^^^^^^ + +The hostname is set by way of the metadata key ``local-hostname``. + +Setting the instance ID +^^^^^^^^^^^^^^^^^^^^^^^ + +The instance ID may be set by way of the metadata key ``instance-id``. However, +if this value is absent then then the instance ID is read from the file +``/sys/class/dmi/id/product_uuid``. + +Providing public SSH keys +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The public SSH keys may be set by way of the metadata key ``public-keys-data``. +Each newline-terminated string will be interpreted as a separate SSH public +key, which will be placed in distro's default user's +``~/.ssh/authorized_keys``. If the value is empty or absent, then nothing will +be written to ``~/.ssh/authorized_keys``. + +Configuring the network +^^^^^^^^^^^^^^^^^^^^^^^ + +The network is configured by setting the metadata key ``network`` with a value +consistent with Network Config Versions +`1 <../network-config-format-v1.html>`_ or +`2 <../network-config-format-v2.html>`_\ , depending on the Linux +distro's version of cloud-init. + +The metadata key ``network.encoding`` may be used to indicate the format of +the metadata key "network". Valid encodings are ``base64`` and ``gzip+base64``. diff --git a/requirements.txt b/requirements.txt index 5817da3b..41d01d62 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,3 +32,12 @@ jsonpatch # For validating cloud-config sections per schema definitions jsonschema + +# Used by DataSourceVMware to inspect the host's network configuration during +# the "setup()" function. +# +# This allows a host that uses DHCP to bring up the network during BootLocal +# and still participate in instance-data by gathering the network in detail at +# runtime and merge that information into the metadata and repersist that to +# disk. +netifaces>=0.10.9 diff --git a/tests/unittests/test_datasource/test_common.py b/tests/unittests/test_datasource/test_common.py index 5e9c547a..00f0a78c 100644 --- a/tests/unittests/test_datasource/test_common.py +++ b/tests/unittests/test_datasource/test_common.py @@ -29,6 +29,7 @@ from cloudinit.sources import ( DataSourceSmartOS as SmartOS, DataSourceUpCloud as UpCloud, DataSourceVultr as Vultr, + DataSourceVMware as VMware, ) from cloudinit.sources import DataSourceNone as DSNone @@ -52,6 +53,7 @@ DEFAULT_LOCAL = [ RbxCloud.DataSourceRbxCloud, Scaleway.DataSourceScaleway, UpCloud.DataSourceUpCloudLocal, + VMware.DataSourceVMware, ] DEFAULT_NETWORK = [ @@ -68,6 +70,7 @@ DEFAULT_NETWORK = [ OpenStack.DataSourceOpenStack, OVF.DataSourceOVFNet, UpCloud.DataSourceUpCloud, + VMware.DataSourceVMware, ] diff --git a/tests/unittests/test_datasource/test_vmware.py b/tests/unittests/test_datasource/test_vmware.py new file mode 100644 index 00000000..597db7c8 --- /dev/null +++ b/tests/unittests/test_datasource/test_vmware.py @@ -0,0 +1,377 @@ +# Copyright (c) 2021 VMware, Inc. All Rights Reserved. +# +# Authors: Andrew Kutz <akutz@vmware.com> +# +# This file is part of cloud-init. See LICENSE file for license information. + +import base64 +import gzip +from cloudinit import dmi, helpers, safeyaml +from cloudinit import settings +from cloudinit.sources import DataSourceVMware +from cloudinit.tests.helpers import ( + mock, + CiTestCase, + FilesystemMockingTestCase, + populate_dir, +) + +import os + +PRODUCT_NAME_FILE_PATH = "/sys/class/dmi/id/product_name" +PRODUCT_NAME = "VMware7,1" +PRODUCT_UUID = "82343CED-E4C7-423B-8F6B-0D34D19067AB" +REROOT_FILES = { + DataSourceVMware.PRODUCT_UUID_FILE_PATH: PRODUCT_UUID, + PRODUCT_NAME_FILE_PATH: PRODUCT_NAME, +} + +VMW_MULTIPLE_KEYS = [ + "ssh-rsa AAAAB3NzaC1yc2EAAAA... test1@vmw.com", + "ssh-rsa AAAAB3NzaC1yc2EAAAA... test2@vmw.com", +] +VMW_SINGLE_KEY = "ssh-rsa AAAAB3NzaC1yc2EAAAA... test@vmw.com" + +VMW_METADATA_YAML = """instance-id: cloud-vm +local-hostname: cloud-vm +network: + version: 2 + ethernets: + nics: + match: + name: ens* + dhcp4: yes +""" + +VMW_USERDATA_YAML = """## template: jinja +#cloud-config +users: +- default +""" + +VMW_VENDORDATA_YAML = """## template: jinja +#cloud-config +runcmd: +- echo "Hello, world." +""" + + +class TestDataSourceVMware(CiTestCase): + """ + Test common functionality that is not transport specific. + """ + + def setUp(self): + super(TestDataSourceVMware, self).setUp() + self.tmp = self.tmp_dir() + + def test_no_data_access_method(self): + ds = get_ds(self.tmp) + ds.vmware_rpctool = None + ret = ds.get_data() + self.assertFalse(ret) + + def test_get_host_info(self): + host_info = DataSourceVMware.get_host_info() + self.assertTrue(host_info) + self.assertTrue(host_info["hostname"]) + self.assertTrue(host_info["local-hostname"]) + self.assertTrue(host_info["local_hostname"]) + self.assertTrue(host_info[DataSourceVMware.LOCAL_IPV4]) + + +class TestDataSourceVMwareEnvVars(FilesystemMockingTestCase): + """ + Test the envvar transport. + """ + + def setUp(self): + super(TestDataSourceVMwareEnvVars, self).setUp() + self.tmp = self.tmp_dir() + os.environ[DataSourceVMware.VMX_GUESTINFO] = "1" + self.create_system_files() + + def tearDown(self): + del os.environ[DataSourceVMware.VMX_GUESTINFO] + return super(TestDataSourceVMwareEnvVars, self).tearDown() + + def create_system_files(self): + rootd = self.tmp_dir() + populate_dir( + rootd, + { + DataSourceVMware.PRODUCT_UUID_FILE_PATH: PRODUCT_UUID, + }, + ) + self.assertTrue(self.reRoot(rootd)) + + def assert_get_data_ok(self, m_fn, m_fn_call_count=6): + ds = get_ds(self.tmp) + ds.vmware_rpctool = None + ret = ds.get_data() + self.assertTrue(ret) + self.assertEqual(m_fn_call_count, m_fn.call_count) + self.assertEqual( + ds.data_access_method, DataSourceVMware.DATA_ACCESS_METHOD_ENVVAR + ) + return ds + + def assert_metadata(self, metadata, m_fn, m_fn_call_count=6): + ds = self.assert_get_data_ok(m_fn, m_fn_call_count) + assert_metadata(self, ds, metadata) + + @mock.patch( + "cloudinit.sources.DataSourceVMware.guestinfo_envvar_get_value" + ) + def test_get_subplatform(self, m_fn): + m_fn.side_effect = [VMW_METADATA_YAML, "", "", "", "", ""] + ds = self.assert_get_data_ok(m_fn, m_fn_call_count=4) + self.assertEqual( + ds.subplatform, + "%s (%s)" + % ( + DataSourceVMware.DATA_ACCESS_METHOD_ENVVAR, + DataSourceVMware.get_guestinfo_envvar_key_name("metadata"), + ), + ) + + @mock.patch( + "cloudinit.sources.DataSourceVMware.guestinfo_envvar_get_value" + ) + def test_get_data_metadata_only(self, m_fn): + m_fn.side_effect = [VMW_METADATA_YAML, "", "", "", "", ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + @mock.patch( + "cloudinit.sources.DataSourceVMware.guestinfo_envvar_get_value" + ) + def test_get_data_userdata_only(self, m_fn): + m_fn.side_effect = ["", VMW_USERDATA_YAML, "", ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + @mock.patch( + "cloudinit.sources.DataSourceVMware.guestinfo_envvar_get_value" + ) + def test_get_data_vendordata_only(self, m_fn): + m_fn.side_effect = ["", "", VMW_VENDORDATA_YAML, ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + @mock.patch( + "cloudinit.sources.DataSourceVMware.guestinfo_envvar_get_value" + ) + def test_get_data_metadata_base64(self, m_fn): + data = base64.b64encode(VMW_METADATA_YAML.encode("utf-8")) + m_fn.side_effect = [data, "base64", "", ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + @mock.patch( + "cloudinit.sources.DataSourceVMware.guestinfo_envvar_get_value" + ) + def test_get_data_metadata_b64(self, m_fn): + data = base64.b64encode(VMW_METADATA_YAML.encode("utf-8")) + m_fn.side_effect = [data, "b64", "", ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + @mock.patch( + "cloudinit.sources.DataSourceVMware.guestinfo_envvar_get_value" + ) + def test_get_data_metadata_gzip_base64(self, m_fn): + data = VMW_METADATA_YAML.encode("utf-8") + data = gzip.compress(data) + data = base64.b64encode(data) + m_fn.side_effect = [data, "gzip+base64", "", ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + @mock.patch( + "cloudinit.sources.DataSourceVMware.guestinfo_envvar_get_value" + ) + def test_get_data_metadata_gz_b64(self, m_fn): + data = VMW_METADATA_YAML.encode("utf-8") + data = gzip.compress(data) + data = base64.b64encode(data) + m_fn.side_effect = [data, "gz+b64", "", ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + @mock.patch( + "cloudinit.sources.DataSourceVMware.guestinfo_envvar_get_value" + ) + def test_metadata_single_ssh_key(self, m_fn): + metadata = DataSourceVMware.load_json_or_yaml(VMW_METADATA_YAML) + metadata["public_keys"] = VMW_SINGLE_KEY + metadata_yaml = safeyaml.dumps(metadata) + m_fn.side_effect = [metadata_yaml, "", "", ""] + self.assert_metadata(metadata, m_fn, m_fn_call_count=4) + + @mock.patch( + "cloudinit.sources.DataSourceVMware.guestinfo_envvar_get_value" + ) + def test_metadata_multiple_ssh_keys(self, m_fn): + metadata = DataSourceVMware.load_json_or_yaml(VMW_METADATA_YAML) + metadata["public_keys"] = VMW_MULTIPLE_KEYS + metadata_yaml = safeyaml.dumps(metadata) + m_fn.side_effect = [metadata_yaml, "", "", ""] + self.assert_metadata(metadata, m_fn, m_fn_call_count=4) + + +class TestDataSourceVMwareGuestInfo(FilesystemMockingTestCase): + """ + Test the guestinfo transport on a VMware platform. + """ + + def setUp(self): + super(TestDataSourceVMwareGuestInfo, self).setUp() + self.tmp = self.tmp_dir() + self.create_system_files() + + def create_system_files(self): + rootd = self.tmp_dir() + populate_dir( + rootd, + { + DataSourceVMware.PRODUCT_UUID_FILE_PATH: PRODUCT_UUID, + PRODUCT_NAME_FILE_PATH: PRODUCT_NAME, + }, + ) + self.assertTrue(self.reRoot(rootd)) + + def assert_get_data_ok(self, m_fn, m_fn_call_count=6): + ds = get_ds(self.tmp) + ds.vmware_rpctool = "vmware-rpctool" + ret = ds.get_data() + self.assertTrue(ret) + self.assertEqual(m_fn_call_count, m_fn.call_count) + self.assertEqual( + ds.data_access_method, + DataSourceVMware.DATA_ACCESS_METHOD_GUESTINFO, + ) + return ds + + def assert_metadata(self, metadata, m_fn, m_fn_call_count=6): + ds = self.assert_get_data_ok(m_fn, m_fn_call_count) + assert_metadata(self, ds, metadata) + + def test_ds_valid_on_vmware_platform(self): + system_type = dmi.read_dmi_data("system-product-name") + self.assertEqual(system_type, PRODUCT_NAME) + + @mock.patch("cloudinit.sources.DataSourceVMware.guestinfo_get_value") + def test_get_subplatform(self, m_fn): + m_fn.side_effect = [VMW_METADATA_YAML, "", "", "", "", ""] + ds = self.assert_get_data_ok(m_fn, m_fn_call_count=4) + self.assertEqual( + ds.subplatform, + "%s (%s)" + % ( + DataSourceVMware.DATA_ACCESS_METHOD_GUESTINFO, + DataSourceVMware.get_guestinfo_key_name("metadata"), + ), + ) + + @mock.patch("cloudinit.sources.DataSourceVMware.guestinfo_get_value") + def test_get_data_userdata_only(self, m_fn): + m_fn.side_effect = ["", VMW_USERDATA_YAML, "", ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + @mock.patch("cloudinit.sources.DataSourceVMware.guestinfo_get_value") + def test_get_data_vendordata_only(self, m_fn): + m_fn.side_effect = ["", "", VMW_VENDORDATA_YAML, ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + @mock.patch("cloudinit.sources.DataSourceVMware.guestinfo_get_value") + def test_metadata_single_ssh_key(self, m_fn): + metadata = DataSourceVMware.load_json_or_yaml(VMW_METADATA_YAML) + metadata["public_keys"] = VMW_SINGLE_KEY + metadata_yaml = safeyaml.dumps(metadata) + m_fn.side_effect = [metadata_yaml, "", "", ""] + self.assert_metadata(metadata, m_fn, m_fn_call_count=4) + + @mock.patch("cloudinit.sources.DataSourceVMware.guestinfo_get_value") + def test_metadata_multiple_ssh_keys(self, m_fn): + metadata = DataSourceVMware.load_json_or_yaml(VMW_METADATA_YAML) + metadata["public_keys"] = VMW_MULTIPLE_KEYS + metadata_yaml = safeyaml.dumps(metadata) + m_fn.side_effect = [metadata_yaml, "", "", ""] + self.assert_metadata(metadata, m_fn, m_fn_call_count=4) + + @mock.patch("cloudinit.sources.DataSourceVMware.guestinfo_get_value") + def test_get_data_metadata_base64(self, m_fn): + data = base64.b64encode(VMW_METADATA_YAML.encode("utf-8")) + m_fn.side_effect = [data, "base64", "", ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + @mock.patch("cloudinit.sources.DataSourceVMware.guestinfo_get_value") + def test_get_data_metadata_b64(self, m_fn): + data = base64.b64encode(VMW_METADATA_YAML.encode("utf-8")) + m_fn.side_effect = [data, "b64", "", ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + @mock.patch("cloudinit.sources.DataSourceVMware.guestinfo_get_value") + def test_get_data_metadata_gzip_base64(self, m_fn): + data = VMW_METADATA_YAML.encode("utf-8") + data = gzip.compress(data) + data = base64.b64encode(data) + m_fn.side_effect = [data, "gzip+base64", "", ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + @mock.patch("cloudinit.sources.DataSourceVMware.guestinfo_get_value") + def test_get_data_metadata_gz_b64(self, m_fn): + data = VMW_METADATA_YAML.encode("utf-8") + data = gzip.compress(data) + data = base64.b64encode(data) + m_fn.side_effect = [data, "gz+b64", "", ""] + self.assert_get_data_ok(m_fn, m_fn_call_count=4) + + +class TestDataSourceVMwareGuestInfo_InvalidPlatform(FilesystemMockingTestCase): + """ + Test the guestinfo transport on a non-VMware platform. + """ + + def setUp(self): + super(TestDataSourceVMwareGuestInfo_InvalidPlatform, self).setUp() + self.tmp = self.tmp_dir() + self.create_system_files() + + def create_system_files(self): + rootd = self.tmp_dir() + populate_dir( + rootd, + { + DataSourceVMware.PRODUCT_UUID_FILE_PATH: PRODUCT_UUID, + }, + ) + self.assertTrue(self.reRoot(rootd)) + + @mock.patch("cloudinit.sources.DataSourceVMware.guestinfo_get_value") + def test_ds_invalid_on_non_vmware_platform(self, m_fn): + system_type = dmi.read_dmi_data("system-product-name") + self.assertEqual(system_type, None) + + m_fn.side_effect = [VMW_METADATA_YAML, "", "", "", "", ""] + ds = get_ds(self.tmp) + ds.vmware_rpctool = "vmware-rpctool" + ret = ds.get_data() + self.assertFalse(ret) + + +def assert_metadata(test_obj, ds, metadata): + test_obj.assertEqual(metadata.get("instance-id"), ds.get_instance_id()) + test_obj.assertEqual(metadata.get("local-hostname"), ds.get_hostname()) + + expected_public_keys = metadata.get("public_keys") + if not isinstance(expected_public_keys, list): + expected_public_keys = [expected_public_keys] + + test_obj.assertEqual(expected_public_keys, ds.get_public_ssh_keys()) + test_obj.assertIsInstance(ds.get_public_ssh_keys(), list) + + +def get_ds(temp_dir): + ds = DataSourceVMware.DataSourceVMware( + settings.CFG_BUILTIN, None, helpers.Paths({"run_dir": temp_dir}) + ) + ds.vmware_rpctool = "vmware-rpctool" + return ds + + +# vi: ts=4 expandtab diff --git a/tests/unittests/test_ds_identify.py b/tests/unittests/test_ds_identify.py index 1d8aaf18..8617d7bd 100644 --- a/tests/unittests/test_ds_identify.py +++ b/tests/unittests/test_ds_identify.py @@ -649,6 +649,50 @@ class TestDsIdentify(DsIdentifyBase): """EC2: bobrightbox.com in product_serial is not brightbox'""" self._test_ds_not_found('Ec2-E24Cloud-negative') + def test_vmware_no_valid_transports(self): + """VMware: no valid transports""" + self._test_ds_not_found('VMware-NoValidTransports') + + def test_vmware_envvar_no_data(self): + """VMware: envvar transport no data""" + self._test_ds_not_found('VMware-EnvVar-NoData') + + def test_vmware_envvar_no_virt_id(self): + """VMware: envvar transport success if no virt id""" + self._test_ds_found('VMware-EnvVar-NoVirtID') + + def test_vmware_envvar_activated_by_metadata(self): + """VMware: envvar transport activated by metadata""" + self._test_ds_found('VMware-EnvVar-Metadata') + + def test_vmware_envvar_activated_by_userdata(self): + """VMware: envvar transport activated by userdata""" + self._test_ds_found('VMware-EnvVar-Userdata') + + def test_vmware_envvar_activated_by_vendordata(self): + """VMware: envvar transport activated by vendordata""" + self._test_ds_found('VMware-EnvVar-Vendordata') + + def test_vmware_guestinfo_no_data(self): + """VMware: guestinfo transport no data""" + self._test_ds_not_found('VMware-GuestInfo-NoData') + + def test_vmware_guestinfo_no_virt_id(self): + """VMware: guestinfo transport fails if no virt id""" + self._test_ds_not_found('VMware-GuestInfo-NoVirtID') + + def test_vmware_guestinfo_activated_by_metadata(self): + """VMware: guestinfo transport activated by metadata""" + self._test_ds_found('VMware-GuestInfo-Metadata') + + def test_vmware_guestinfo_activated_by_userdata(self): + """VMware: guestinfo transport activated by userdata""" + self._test_ds_found('VMware-GuestInfo-Userdata') + + def test_vmware_guestinfo_activated_by_vendordata(self): + """VMware: guestinfo transport activated by vendordata""" + self._test_ds_found('VMware-GuestInfo-Vendordata') + class TestBSDNoSys(DsIdentifyBase): """Test *BSD code paths @@ -1136,7 +1180,240 @@ VALID_CFG = { 'Ec2-E24Cloud-negative': { 'ds': 'Ec2', 'files': {P_SYS_VENDOR: 'e24cloudyday\n'}, - } + }, + 'VMware-NoValidTransports': { + 'ds': 'VMware', + 'mocks': [ + MOCK_VIRT_IS_VMWARE, + ], + }, + 'VMware-EnvVar-NoData': { + 'ds': 'VMware', + 'mocks': [ + { + 'name': 'vmware_has_envvar_vmx_guestinfo', + 'ret': 0, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_metadata', + 'ret': 1, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_userdata', + 'ret': 1, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_vendordata', + 'ret': 1, + }, + MOCK_VIRT_IS_VMWARE, + ], + }, + 'VMware-EnvVar-NoVirtID': { + 'ds': 'VMware', + 'mocks': [ + { + 'name': 'vmware_has_envvar_vmx_guestinfo', + 'ret': 0, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_metadata', + 'ret': 0, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_userdata', + 'ret': 1, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_vendordata', + 'ret': 1, + }, + ], + }, + 'VMware-EnvVar-Metadata': { + 'ds': 'VMware', + 'mocks': [ + { + 'name': 'vmware_has_envvar_vmx_guestinfo', + 'ret': 0, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_metadata', + 'ret': 0, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_userdata', + 'ret': 1, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_vendordata', + 'ret': 1, + }, + MOCK_VIRT_IS_VMWARE, + ], + }, + 'VMware-EnvVar-Userdata': { + 'ds': 'VMware', + 'mocks': [ + { + 'name': 'vmware_has_envvar_vmx_guestinfo', + 'ret': 0, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_metadata', + 'ret': 1, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_userdata', + 'ret': 0, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_vendordata', + 'ret': 1, + }, + MOCK_VIRT_IS_VMWARE, + ], + }, + 'VMware-EnvVar-Vendordata': { + 'ds': 'VMware', + 'mocks': [ + { + 'name': 'vmware_has_envvar_vmx_guestinfo', + 'ret': 0, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_metadata', + 'ret': 1, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_userdata', + 'ret': 1, + }, + { + 'name': 'vmware_has_envvar_vmx_guestinfo_vendordata', + 'ret': 0, + }, + MOCK_VIRT_IS_VMWARE, + ], + }, + 'VMware-GuestInfo-NoData': { + 'ds': 'VMware', + 'mocks': [ + { + 'name': 'vmware_has_rpctool', + 'ret': 0, + 'out': '/usr/bin/vmware-rpctool', + }, + { + 'name': 'vmware_rpctool_guestinfo_metadata', + 'ret': 1, + }, + { + 'name': 'vmware_rpctool_guestinfo_userdata', + 'ret': 1, + }, + { + 'name': 'vmware_rpctool_guestinfo_vendordata', + 'ret': 1, + }, + MOCK_VIRT_IS_VMWARE, + ], + }, + 'VMware-GuestInfo-NoVirtID': { + 'ds': 'VMware', + 'mocks': [ + { + 'name': 'vmware_has_rpctool', + 'ret': 0, + 'out': '/usr/bin/vmware-rpctool', + }, + { + 'name': 'vmware_rpctool_guestinfo_metadata', + 'ret': 0, + 'out': '---', + }, + { + 'name': 'vmware_rpctool_guestinfo_userdata', + 'ret': 1, + }, + { + 'name': 'vmware_rpctool_guestinfo_vendordata', + 'ret': 1, + }, + ], + }, + 'VMware-GuestInfo-Metadata': { + 'ds': 'VMware', + 'mocks': [ + { + 'name': 'vmware_has_rpctool', + 'ret': 0, + 'out': '/usr/bin/vmware-rpctool', + }, + { + 'name': 'vmware_rpctool_guestinfo_metadata', + 'ret': 0, + 'out': '---', + }, + { + 'name': 'vmware_rpctool_guestinfo_userdata', + 'ret': 1, + }, + { + 'name': 'vmware_rpctool_guestinfo_vendordata', + 'ret': 1, + }, + MOCK_VIRT_IS_VMWARE, + ], + }, + 'VMware-GuestInfo-Userdata': { + 'ds': 'VMware', + 'mocks': [ + { + 'name': 'vmware_has_rpctool', + 'ret': 0, + 'out': '/usr/bin/vmware-rpctool', + }, + { + 'name': 'vmware_rpctool_guestinfo_metadata', + 'ret': 1, + }, + { + 'name': 'vmware_rpctool_guestinfo_userdata', + 'ret': 0, + 'out': '---', + }, + { + 'name': 'vmware_rpctool_guestinfo_vendordata', + 'ret': 1, + }, + MOCK_VIRT_IS_VMWARE, + ], + }, + 'VMware-GuestInfo-Vendordata': { + 'ds': 'VMware', + 'mocks': [ + { + 'name': 'vmware_has_rpctool', + 'ret': 0, + 'out': '/usr/bin/vmware-rpctool', + }, + { + 'name': 'vmware_rpctool_guestinfo_metadata', + 'ret': 1, + }, + { + 'name': 'vmware_rpctool_guestinfo_userdata', + 'ret': 1, + }, + { + 'name': 'vmware_rpctool_guestinfo_vendordata', + 'ret': 0, + 'out': '---', + }, + MOCK_VIRT_IS_VMWARE, + ], + }, } # vi: ts=4 expandtab diff --git a/tools/.github-cla-signers b/tools/.github-cla-signers index 3c2c6d14..5089dd70 100644 --- a/tools/.github-cla-signers +++ b/tools/.github-cla-signers @@ -1,5 +1,6 @@ ader1990 ajmyyra +akutz AlexBaranowski Aman306 andrewbogott diff --git a/tools/ds-identify b/tools/ds-identify index 73e27c71..234ffa81 100755 --- a/tools/ds-identify +++ b/tools/ds-identify @@ -125,7 +125,7 @@ DI_DSNAME="" # be searched if there is no setting found in config. DI_DSLIST_DEFAULT="MAAS ConfigDrive NoCloud AltCloud Azure Bigstep \ CloudSigma CloudStack DigitalOcean Vultr AliYun Ec2 GCE OpenNebula OpenStack \ -OVF SmartOS Scaleway Hetzner IBMCloud Oracle Exoscale RbxCloud UpCloud" +OVF SmartOS Scaleway Hetzner IBMCloud Oracle Exoscale RbxCloud UpCloud VMware" DI_DSLIST="" DI_MODE="" DI_ON_FOUND="" @@ -1364,6 +1364,80 @@ dscheck_Vultr() { return $DS_NOT_FOUND } +vmware_has_envvar_vmx_guestinfo() { + [ -n "${VMX_GUESTINFO:-}" ] +} + +vmware_has_envvar_vmx_guestinfo_metadata() { + [ -n "${VMX_GUESTINFO_METADATA:-}" ] +} + +vmware_has_envvar_vmx_guestinfo_userdata() { + [ -n "${VMX_GUESTINFO_USERDATA:-}" ] +} + +vmware_has_envvar_vmx_guestinfo_vendordata() { + [ -n "${VMX_GUESTINFO_VENDORDATA:-}" ] +} + +vmware_has_rpctool() { + command -v vmware-rpctool >/dev/null 2>&1 +} + +vmware_rpctool_guestinfo_metadata() { + vmware-rpctool "info-get guestinfo.metadata" +} + +vmware_rpctool_guestinfo_userdata() { + vmware-rpctool "info-get guestinfo.userdata" +} + +vmware_rpctool_guestinfo_vendordata() { + vmware-rpctool "info-get guestinfo.vendordata" +} + +dscheck_VMware() { + # Checks to see if there is valid data for the VMware datasource. + # The data transports are checked in the following order: + # + # * envvars + # * guestinfo + # + # Please note when updating this function with support for new data + # transports, the order should match the order in the _get_data + # function from the file DataSourceVMware.py. + + # Check to see if running in a container and the VMware + # datasource is configured via environment variables. + if vmware_has_envvar_vmx_guestinfo; then + if vmware_has_envvar_vmx_guestinfo_metadata || \ + vmware_has_envvar_vmx_guestinfo_userdata || \ + vmware_has_envvar_vmx_guestinfo_vendordata; then + return "${DS_FOUND}" + fi + fi + + # Do not proceed unless the detected platform is VMware. + if [ ! "${DI_VIRT}" = "vmware" ]; then + return "${DS_NOT_FOUND}" + fi + + # Do not proceed if the vmware-rpctool command is not present. + if ! vmware_has_rpctool; then + return "${DS_NOT_FOUND}" + fi + + # Activate the VMware datasource only if any of the fields used + # by the datasource are present in the guestinfo table. + if { vmware_rpctool_guestinfo_metadata || \ + vmware_rpctool_guestinfo_userdata || \ + vmware_rpctool_guestinfo_vendordata; } >/dev/null 2>&1; then + return "${DS_FOUND}" + fi + + return "${DS_NOT_FOUND}" +} + collect_info() { read_uname_info read_virt |