diff options
| author | Anh Vo <anhvo@microsoft.com> | 2019-04-03 18:23:18 +0000 | 
|---|---|---|
| committer | Server Team CI Bot <josh.powers+server-team-bot@canonical.com> | 2019-04-03 18:23:18 +0000 | 
| commit | 0d8c88393b51db6454491a379dcc2e691551217a (patch) | |
| tree | 73b22343f430ef46bdf40f4b6163b45a5dcded8c /cloudinit | |
| parent | 47c53002ea7a661c674c3e409357db7e8a00297a (diff) | |
| download | vyos-cloud-init-0d8c88393b51db6454491a379dcc2e691551217a.tar.gz vyos-cloud-init-0d8c88393b51db6454491a379dcc2e691551217a.zip | |
DatasourceAzure: add additional logging for azure datasource
Create an Azure logging decorator and use additional ReportEventStack
context managers to provide additional logging details.
Diffstat (limited to 'cloudinit')
| -rwxr-xr-x[-rw-r--r--] | cloudinit/sources/DataSourceAzure.py | 231 | ||||
| -rwxr-xr-x[-rw-r--r--] | cloudinit/sources/helpers/azure.py | 31 | 
2 files changed, 179 insertions, 83 deletions
| diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index b4e3f061..d4230b3c 100644..100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -21,10 +21,14 @@ from cloudinit import net  from cloudinit.event import EventType  from cloudinit.net.dhcp import EphemeralDHCPv4  from cloudinit import sources -from cloudinit.sources.helpers.azure import get_metadata_from_fabric  from cloudinit.sources.helpers import netlink  from cloudinit.url_helper import UrlError, readurl, retry_on_url_exc  from cloudinit import util +from cloudinit.reporting import events + +from cloudinit.sources.helpers.azure import (azure_ds_reporter, +                                             azure_ds_telemetry_reporter, +                                             get_metadata_from_fabric)  LOG = logging.getLogger(__name__) @@ -244,6 +248,7 @@ def set_hostname(hostname, hostname_command='hostname'):      util.subp([hostname_command, hostname]) +@azure_ds_telemetry_reporter  @contextlib.contextmanager  def temporary_hostname(temp_hostname, cfg, hostname_command='hostname'):      """ @@ -290,6 +295,7 @@ class DataSourceAzure(sources.DataSource):          root = sources.DataSource.__str__(self)          return "%s [seed=%s]" % (root, self.seed) +    @azure_ds_telemetry_reporter      def bounce_network_with_azure_hostname(self):          # When using cloud-init to provision, we have to set the hostname from          # the metadata and "bounce" the network to force DDNS to update via @@ -315,6 +321,7 @@ class DataSourceAzure(sources.DataSource):                      util.logexc(LOG, "handling set_hostname failed")          return False +    @azure_ds_telemetry_reporter      def get_metadata_from_agent(self):          temp_hostname = self.metadata.get('local-hostname')          agent_cmd = self.ds_cfg['agent_command'] @@ -344,15 +351,18 @@ class DataSourceAzure(sources.DataSource):                  LOG.debug("ssh authentication: "                            "using fingerprint from fabirc") -        # wait very long for public SSH keys to arrive -        # https://bugs.launchpad.net/cloud-init/+bug/1717611 -        missing = util.log_time(logfunc=LOG.debug, -                                msg="waiting for SSH public key files", -                                func=util.wait_for_files, -                                args=(fp_files, 900)) - -        if len(missing): -            LOG.warning("Did not find files, but going on: %s", missing) +        with events.ReportEventStack( +                name="waiting-for-ssh-public-key", +                description="wait for agents to retrieve ssh keys", +                parent=azure_ds_reporter): +            # wait very long for public SSH keys to arrive +            # https://bugs.launchpad.net/cloud-init/+bug/1717611 +            missing = util.log_time(logfunc=LOG.debug, +                                    msg="waiting for SSH public key files", +                                    func=util.wait_for_files, +                                    args=(fp_files, 900)) +            if len(missing): +                LOG.warning("Did not find files, but going on: %s", missing)          metadata = {}          metadata['public-keys'] = key_value or pubkeys_from_crt_files(fp_files) @@ -366,6 +376,7 @@ class DataSourceAzure(sources.DataSource):              subplatform_type = 'seed-dir'          return '%s (%s)' % (subplatform_type, self.seed) +    @azure_ds_telemetry_reporter      def crawl_metadata(self):          """Walk all instance metadata sources returning a dict on success. @@ -467,6 +478,7 @@ class DataSourceAzure(sources.DataSource):          super(DataSourceAzure, self).clear_cached_attrs(attr_defaults)          self._metadata_imds = sources.UNSET +    @azure_ds_telemetry_reporter      def _get_data(self):          """Crawl and process datasource metadata caching metadata as attrs. @@ -513,6 +525,7 @@ class DataSourceAzure(sources.DataSource):          # quickly (local check only) if self.instance_id is still valid          return sources.instance_id_matches_system_uuid(self.get_instance_id()) +    @azure_ds_telemetry_reporter      def setup(self, is_new_instance):          if self._negotiated is False:              LOG.debug("negotiating for %s (new_instance=%s)", @@ -580,6 +593,7 @@ class DataSourceAzure(sources.DataSource):                  if nl_sock:                      nl_sock.close() +    @azure_ds_telemetry_reporter      def _report_ready(self, lease):          """Tells the fabric provisioning has completed """          try: @@ -617,9 +631,14 @@ class DataSourceAzure(sources.DataSource):      def _reprovision(self):          """Initiate the reprovisioning workflow."""          contents = self._poll_imds() -        md, ud, cfg = read_azure_ovf(contents) -        return (md, ud, cfg, {'ovf-env.xml': contents}) - +        with events.ReportEventStack( +                name="reprovisioning-read-azure-ovf", +                description="read azure ovf during reprovisioning", +                parent=azure_ds_reporter): +            md, ud, cfg = read_azure_ovf(contents) +            return (md, ud, cfg, {'ovf-env.xml': contents}) + +    @azure_ds_telemetry_reporter      def _negotiate(self):          """Negotiate with fabric and return data from it. @@ -652,6 +671,7 @@ class DataSourceAzure(sources.DataSource):          util.del_file(REPROVISION_MARKER_FILE)          return fabric_data +    @azure_ds_telemetry_reporter      def activate(self, cfg, is_new_instance):          address_ephemeral_resize(is_new_instance=is_new_instance,                                   preserve_ntfs=self.ds_cfg.get( @@ -690,12 +710,14 @@ def _partitions_on_device(devpath, maxnum=16):      return [] +@azure_ds_telemetry_reporter  def _has_ntfs_filesystem(devpath):      ntfs_devices = util.find_devs_with("TYPE=ntfs", no_cache=True)      LOG.debug('ntfs_devices found = %s', ntfs_devices)      return os.path.realpath(devpath) in ntfs_devices +@azure_ds_telemetry_reporter  def can_dev_be_reformatted(devpath, preserve_ntfs):      """Determine if the ephemeral drive at devpath should be reformatted. @@ -744,43 +766,59 @@ def can_dev_be_reformatted(devpath, preserve_ntfs):                 (cand_part, cand_path, devpath))          return False, msg +    @azure_ds_telemetry_reporter      def count_files(mp):          ignored = set(['dataloss_warning_readme.txt'])          return len([f for f in os.listdir(mp) if f.lower() not in ignored])      bmsg = ('partition %s (%s) on device %s was ntfs formatted' %              (cand_part, cand_path, devpath)) -    try: -        file_count = util.mount_cb(cand_path, count_files, mtype="ntfs", -                                   update_env_for_mount={'LANG': 'C'}) -    except util.MountFailedError as e: -        if "unknown filesystem type 'ntfs'" in str(e): -            return True, (bmsg + ' but this system cannot mount NTFS,' -                          ' assuming there are no important files.' -                          ' Formatting allowed.') -        return False, bmsg + ' but mount of %s failed: %s' % (cand_part, e) - -    if file_count != 0: -        LOG.warning("it looks like you're using NTFS on the ephemeral disk, " -                    'to ensure that filesystem does not get wiped, set ' -                    '%s.%s in config', '.'.join(DS_CFG_PATH), -                    DS_CFG_KEY_PRESERVE_NTFS) -        return False, bmsg + ' but had %d files on it.' % file_count + +    with events.ReportEventStack( +                name="mount-ntfs-and-count", +                description="mount-ntfs-and-count", +                parent=azure_ds_reporter) as evt: +        try: +            file_count = util.mount_cb(cand_path, count_files, mtype="ntfs", +                                       update_env_for_mount={'LANG': 'C'}) +        except util.MountFailedError as e: +            evt.description = "cannot mount ntfs" +            if "unknown filesystem type 'ntfs'" in str(e): +                return True, (bmsg + ' but this system cannot mount NTFS,' +                              ' assuming there are no important files.' +                              ' Formatting allowed.') +            return False, bmsg + ' but mount of %s failed: %s' % (cand_part, e) + +        if file_count != 0: +            evt.description = "mounted and counted %d files" % file_count +            LOG.warning("it looks like you're using NTFS on the ephemeral" +                        " disk, to ensure that filesystem does not get wiped," +                        " set %s.%s in config", '.'.join(DS_CFG_PATH), +                        DS_CFG_KEY_PRESERVE_NTFS) +            return False, bmsg + ' but had %d files on it.' % file_count      return True, bmsg + ' and had no important files. Safe for reformatting.' +@azure_ds_telemetry_reporter  def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120,                               is_new_instance=False, preserve_ntfs=False):      # wait for ephemeral disk to come up      naplen = .2 -    missing = util.wait_for_files([devpath], maxwait=maxwait, naplen=naplen, -                                  log_pre="Azure ephemeral disk: ") - -    if missing: -        LOG.warning("ephemeral device '%s' did not appear after %d seconds.", -                    devpath, maxwait) -        return +    with events.ReportEventStack( +                name="wait-for-ephemeral-disk", +                description="wait for ephemeral disk", +                parent=azure_ds_reporter): +        missing = util.wait_for_files([devpath], +                                      maxwait=maxwait, +                                      naplen=naplen, +                                      log_pre="Azure ephemeral disk: ") + +        if missing: +            LOG.warning("ephemeral device '%s' did" +                        " not appear after %d seconds.", +                        devpath, maxwait) +            return      result = False      msg = None @@ -808,6 +846,7 @@ def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120,      return +@azure_ds_telemetry_reporter  def perform_hostname_bounce(hostname, cfg, prev_hostname):      # set the hostname to 'hostname' if it is not already set to that.      # then, if policy is not off, bounce the interface using command @@ -843,6 +882,7 @@ def perform_hostname_bounce(hostname, cfg, prev_hostname):      return True +@azure_ds_telemetry_reporter  def crtfile_to_pubkey(fname, data=None):      pipeline = ('openssl x509 -noout -pubkey < "$0" |'                  'ssh-keygen -i -m PKCS8 -f /dev/stdin') @@ -851,6 +891,7 @@ def crtfile_to_pubkey(fname, data=None):      return out.rstrip() +@azure_ds_telemetry_reporter  def pubkeys_from_crt_files(flist):      pubkeys = []      errors = [] @@ -866,6 +907,7 @@ def pubkeys_from_crt_files(flist):      return pubkeys +@azure_ds_telemetry_reporter  def write_files(datadir, files, dirmode=None):      def _redact_password(cnt, fname): @@ -893,6 +935,7 @@ def write_files(datadir, files, dirmode=None):          util.write_file(filename=fname, content=content, mode=0o600) +@azure_ds_telemetry_reporter  def invoke_agent(cmd):      # this is a function itself to simplify patching it for test      if cmd: @@ -912,6 +955,7 @@ def find_child(node, filter_func):      return ret +@azure_ds_telemetry_reporter  def load_azure_ovf_pubkeys(sshnode):      # This parses a 'SSH' node formatted like below, and returns      # an array of dicts. @@ -964,6 +1008,7 @@ def load_azure_ovf_pubkeys(sshnode):      return found +@azure_ds_telemetry_reporter  def read_azure_ovf(contents):      try:          dom = minidom.parseString(contents) @@ -1064,6 +1109,7 @@ def read_azure_ovf(contents):      return (md, ud, cfg) +@azure_ds_telemetry_reporter  def _extract_preprovisioned_vm_setting(dom):      """Read the preprovision flag from the ovf. It should not         exist unless true.""" @@ -1092,6 +1138,7 @@ def encrypt_pass(password, salt_id="$6$"):      return crypt.crypt(password, salt_id + util.rand_str(strlen=16)) +@azure_ds_telemetry_reporter  def _check_freebsd_cdrom(cdrom_dev):      """Return boolean indicating path to cdrom device has content."""      try: @@ -1103,6 +1150,7 @@ def _check_freebsd_cdrom(cdrom_dev):      return False +@azure_ds_telemetry_reporter  def _get_random_seed(source=PLATFORM_ENTROPY_SOURCE):      """Return content random seed file if available, otherwise,         return None.""" @@ -1126,6 +1174,7 @@ def _get_random_seed(source=PLATFORM_ENTROPY_SOURCE):      return seed +@azure_ds_telemetry_reporter  def list_possible_azure_ds_devs():      devlist = []      if util.is_FreeBSD(): @@ -1140,6 +1189,7 @@ def list_possible_azure_ds_devs():      return devlist +@azure_ds_telemetry_reporter  def load_azure_ds_dir(source_dir):      ovf_file = os.path.join(source_dir, "ovf-env.xml") @@ -1162,47 +1212,54 @@ def parse_network_config(imds_metadata):      @param: imds_metadata: Dict of content read from IMDS network service.      @return: Dictionary containing network version 2 standard configuration.      """ -    if imds_metadata != sources.UNSET and imds_metadata: -        netconfig = {'version': 2, 'ethernets': {}} -        LOG.debug('Azure: generating network configuration from IMDS') -        network_metadata = imds_metadata['network'] -        for idx, intf in enumerate(network_metadata['interface']): -            nicname = 'eth{idx}'.format(idx=idx) -            dev_config = {} -            for addr4 in intf['ipv4']['ipAddress']: -                privateIpv4 = addr4['privateIpAddress'] -                if privateIpv4: -                    if dev_config.get('dhcp4', False): -                        # Append static address config for nic > 1 -                        netPrefix = intf['ipv4']['subnet'][0].get( -                            'prefix', '24') -                        if not dev_config.get('addresses'): -                            dev_config['addresses'] = [] -                        dev_config['addresses'].append( -                            '{ip}/{prefix}'.format( -                                ip=privateIpv4, prefix=netPrefix)) -                    else: -                        dev_config['dhcp4'] = True -            for addr6 in intf['ipv6']['ipAddress']: -                privateIpv6 = addr6['privateIpAddress'] -                if privateIpv6: -                    dev_config['dhcp6'] = True -                    break -            if dev_config: -                mac = ':'.join(re.findall(r'..', intf['macAddress'])) -                dev_config.update( -                    {'match': {'macaddress': mac.lower()}, -                     'set-name': nicname}) -                netconfig['ethernets'][nicname] = dev_config -    else: -        blacklist = ['mlx4_core'] -        LOG.debug('Azure: generating fallback configuration') -        # generate a network config, blacklist picking mlx4_core devs -        netconfig = net.generate_fallback_config( -            blacklist_drivers=blacklist, config_driver=True) -    return netconfig +    with events.ReportEventStack( +                name="parse_network_config", +                description="", +                parent=azure_ds_reporter) as evt: +        if imds_metadata != sources.UNSET and imds_metadata: +            netconfig = {'version': 2, 'ethernets': {}} +            LOG.debug('Azure: generating network configuration from IMDS') +            network_metadata = imds_metadata['network'] +            for idx, intf in enumerate(network_metadata['interface']): +                nicname = 'eth{idx}'.format(idx=idx) +                dev_config = {} +                for addr4 in intf['ipv4']['ipAddress']: +                    privateIpv4 = addr4['privateIpAddress'] +                    if privateIpv4: +                        if dev_config.get('dhcp4', False): +                            # Append static address config for nic > 1 +                            netPrefix = intf['ipv4']['subnet'][0].get( +                                'prefix', '24') +                            if not dev_config.get('addresses'): +                                dev_config['addresses'] = [] +                            dev_config['addresses'].append( +                                '{ip}/{prefix}'.format( +                                    ip=privateIpv4, prefix=netPrefix)) +                        else: +                            dev_config['dhcp4'] = True +                for addr6 in intf['ipv6']['ipAddress']: +                    privateIpv6 = addr6['privateIpAddress'] +                    if privateIpv6: +                        dev_config['dhcp6'] = True +                        break +                if dev_config: +                    mac = ':'.join(re.findall(r'..', intf['macAddress'])) +                    dev_config.update( +                        {'match': {'macaddress': mac.lower()}, +                         'set-name': nicname}) +                    netconfig['ethernets'][nicname] = dev_config +            evt.description = "network config from imds" +        else: +            blacklist = ['mlx4_core'] +            LOG.debug('Azure: generating fallback configuration') +            # generate a network config, blacklist picking mlx4_core devs +            netconfig = net.generate_fallback_config( +                blacklist_drivers=blacklist, config_driver=True) +            evt.description = "network config from fallback" +        return netconfig +@azure_ds_telemetry_reporter  def get_metadata_from_imds(fallback_nic, retries):      """Query Azure's network metadata service, returning a dictionary. @@ -1227,6 +1284,7 @@ def get_metadata_from_imds(fallback_nic, retries):              return util.log_time(**kwargs) +@azure_ds_telemetry_reporter  def _get_metadata_from_imds(retries):      url = IMDS_URL + "instance?api-version=2017-12-01" @@ -1246,6 +1304,7 @@ def _get_metadata_from_imds(retries):      return {} +@azure_ds_telemetry_reporter  def maybe_remove_ubuntu_network_config_scripts(paths=None):      """Remove Azure-specific ubuntu network config for non-primary nics. @@ -1283,14 +1342,20 @@ def maybe_remove_ubuntu_network_config_scripts(paths=None):  def _is_platform_viable(seed_dir): -    """Check platform environment to report if this datasource may run.""" -    asset_tag = util.read_dmi_data('chassis-asset-tag') -    if asset_tag == AZURE_CHASSIS_ASSET_TAG: -        return True -    LOG.debug("Non-Azure DMI asset tag '%s' discovered.", asset_tag) -    if os.path.exists(os.path.join(seed_dir, 'ovf-env.xml')): -        return True -    return False +    with events.ReportEventStack( +                name="check-platform-viability", +                description="found azure asset tag", +                parent=azure_ds_reporter) as evt: + +        """Check platform environment to report if this datasource may run.""" +        asset_tag = util.read_dmi_data('chassis-asset-tag') +        if asset_tag == AZURE_CHASSIS_ASSET_TAG: +            return True +        LOG.debug("Non-Azure DMI asset tag '%s' discovered.", asset_tag) +        evt.description = "Non-Azure DMI asset tag '%s' discovered.", asset_tag +        if os.path.exists(os.path.join(seed_dir, 'ovf-env.xml')): +            return True +        return False  class BrokenAzureDataSource(Exception): diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index 2829dd20..d3af05ee 100644..100755 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -16,10 +16,27 @@ from xml.etree import ElementTree  from cloudinit import url_helper  from cloudinit import util +from cloudinit.reporting import events  LOG = logging.getLogger(__name__) +azure_ds_reporter = events.ReportEventStack( +    name="azure-ds", +    description="initialize reporter for azure ds", +    reporting_enabled=True) + + +def azure_ds_telemetry_reporter(func): +    def impl(*args, **kwargs): +        with events.ReportEventStack( +                name=func.__name__, +                description=func.__name__, +                parent=azure_ds_reporter): +            return func(*args, **kwargs) +    return impl + +  @contextmanager  def cd(newdir):      prevdir = os.getcwd() @@ -119,6 +136,7 @@ class OpenSSLManager(object):      def clean_up(self):          util.del_dir(self.tmpdir) +    @azure_ds_telemetry_reporter      def generate_certificate(self):          LOG.debug('Generating certificate for communication with fabric...')          if self.certificate is not None: @@ -139,17 +157,20 @@ class OpenSSLManager(object):          LOG.debug('New certificate generated.')      @staticmethod +    @azure_ds_telemetry_reporter      def _run_x509_action(action, cert):          cmd = ['openssl', 'x509', '-noout', action]          result, _ = util.subp(cmd, data=cert)          return result +    @azure_ds_telemetry_reporter      def _get_ssh_key_from_cert(self, certificate):          pub_key = self._run_x509_action('-pubkey', certificate)          keygen_cmd = ['ssh-keygen', '-i', '-m', 'PKCS8', '-f', '/dev/stdin']          ssh_key, _ = util.subp(keygen_cmd, data=pub_key)          return ssh_key +    @azure_ds_telemetry_reporter      def _get_fingerprint_from_cert(self, certificate):          """openssl x509 formats fingerprints as so:          'SHA1 Fingerprint=07:3E:19:D1:4D:1C:79:92:24:C6:A0:FD:8D:DA:\ @@ -163,6 +184,7 @@ class OpenSSLManager(object):          octets = raw_fp[eq+1:-1].split(':')          return ''.join(octets) +    @azure_ds_telemetry_reporter      def _decrypt_certs_from_xml(self, certificates_xml):          """Decrypt the certificates XML document using the our private key;             return the list of certs and private keys contained in the doc. @@ -185,6 +207,7 @@ class OpenSSLManager(object):                  shell=True, data=b'\n'.join(lines))          return out +    @azure_ds_telemetry_reporter      def parse_certificates(self, certificates_xml):          """Given the Certificates XML document, return a dictionary of             fingerprints and associated SSH keys derived from the certs.""" @@ -265,11 +288,13 @@ class WALinuxAgentShim(object):          return socket.inet_ntoa(packed_bytes)      @staticmethod +    @azure_ds_telemetry_reporter      def _networkd_get_value_from_leases(leases_d=None):          return dhcp.networkd_get_option_from_leases(              'OPTION_245', leases_d=leases_d)      @staticmethod +    @azure_ds_telemetry_reporter      def _get_value_from_leases_file(fallback_lease_file):          leases = []          content = util.load_file(fallback_lease_file) @@ -287,6 +312,7 @@ class WALinuxAgentShim(object):              return leases[-1]      @staticmethod +    @azure_ds_telemetry_reporter      def _load_dhclient_json():          dhcp_options = {}          hooks_dir = WALinuxAgentShim._get_hooks_dir() @@ -305,6 +331,7 @@ class WALinuxAgentShim(object):          return dhcp_options      @staticmethod +    @azure_ds_telemetry_reporter      def _get_value_from_dhcpoptions(dhcp_options):          if dhcp_options is None:              return None @@ -318,6 +345,7 @@ class WALinuxAgentShim(object):          return _value      @staticmethod +    @azure_ds_telemetry_reporter      def find_endpoint(fallback_lease_file=None, dhcp245=None):          value = None          if dhcp245 is not None: @@ -352,6 +380,7 @@ class WALinuxAgentShim(object):          LOG.debug('Azure endpoint found at %s', endpoint_ip_address)          return endpoint_ip_address +    @azure_ds_telemetry_reporter      def register_with_azure_and_fetch_data(self, pubkey_info=None):          if self.openssl_manager is None:              self.openssl_manager = OpenSSLManager() @@ -404,6 +433,7 @@ class WALinuxAgentShim(object):          return keys +    @azure_ds_telemetry_reporter      def _report_ready(self, goal_state, http_client):          LOG.debug('Reporting ready to Azure fabric.')          document = self.REPORT_READY_XML_TEMPLATE.format( @@ -419,6 +449,7 @@ class WALinuxAgentShim(object):          LOG.info('Reported ready to Azure fabric.') +@azure_ds_telemetry_reporter  def get_metadata_from_fabric(fallback_lease_file=None, dhcp_opts=None,                               pubkey_info=None):      shim = WALinuxAgentShim(fallback_lease_file=fallback_lease_file, | 
