diff options
Diffstat (limited to 'azurelinuxagent')
29 files changed, 459 insertions, 247 deletions
diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index 2c21e47..bd6dd20 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -25,6 +25,8 @@ import os import sys import re import subprocess +import traceback + import azurelinuxagent.common.logger as logger import azurelinuxagent.common.event as event import azurelinuxagent.common.conf as conf @@ -133,8 +135,10 @@ def main(args=[]): agent.daemon() elif command == "run-exthandlers": agent.run_exthandlers() - except Exception as e: - logger.error(u"Failed to run '{0}': {1}", command, e) + except Exception: + logger.error(u"Failed to run '{0}': {1}", + command, + traceback.format_exc()) def parse_args(sys_args): """ diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 9c79d10..7911699 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -98,6 +98,10 @@ def get_lib_dir(conf=__conf__): return conf.get("Lib.Dir", "/var/lib/waagent") +def get_published_hostname(conf=__conf__): + return os.path.join(get_lib_dir(conf), 'published_hostname') + + def get_dvd_mount_point(conf=__conf__): return conf.get("DVD.MountPoint", "/mnt/cdrom/secure") diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index 4037622..9265820 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -28,29 +28,31 @@ import azurelinuxagent.common.logger as logger from azurelinuxagent.common.exception import EventError, ProtocolError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.restapi import TelemetryEventParam, \ - TelemetryEventList, \ - TelemetryEvent, \ - set_properties, get_properties + TelemetryEventList, \ + TelemetryEvent, \ + set_properties, get_properties from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION, \ - DISTRO_CODE_NAME, AGENT_VERSION, \ - CURRENT_AGENT, CURRENT_VERSION + DISTRO_CODE_NAME, AGENT_VERSION, \ + CURRENT_AGENT, CURRENT_VERSION class WALAEventOperation: - ActivateResourceDisk="ActivateResourceDisk" + ActivateResourceDisk = "ActivateResourceDisk" Disable = "Disable" Download = "Download" Enable = "Enable" HealthCheck = "HealthCheck" - HeartBeat="HeartBeat" + HeartBeat = "HeartBeat" Install = "Install" + InitializeHostPlugin = "InitializeHostPlugin" Provision = "Provision" - Restart="Restart" - UnhandledError="UnhandledError" + Restart = "Restart" + UnhandledError = "UnhandledError" UnInstall = "UnInstall" Upgrade = "Upgrade" Update = "Update" + class EventLogger(object): def __init__(self): self.event_dir = None @@ -66,22 +68,24 @@ class EventLogger(object): if len(os.listdir(self.event_dir)) > 1000: raise EventError("Too many files under: {0}".format(self.event_dir)) - filename = os.path.join(self.event_dir, ustr(int(time.time()*1000000))) + filename = os.path.join(self.event_dir, + ustr(int(time.time() * 1000000))) try: - with open(filename+".tmp",'wb+') as hfile: + with open(filename + ".tmp", 'wb+') as hfile: hfile.write(data.encode("utf-8")) - os.rename(filename+".tmp", filename+".tld") + os.rename(filename + ".tmp", filename + ".tld") except IOError as e: raise EventError("Failed to write events to file:{0}", e) - def add_event(self, name, op="", is_success=True, duration=0, version=CURRENT_VERSION, + def add_event(self, name, op="", is_success=True, duration=0, + version=CURRENT_VERSION, message="", evt_type="", is_internal=False): event = TelemetryEvent(1, "69B669B9-4AF8-4C50-BDC4-6006FA76E975") event.parameters.append(TelemetryEventParam('Name', name)) event.parameters.append(TelemetryEventParam('Version', str(version))) event.parameters.append(TelemetryEventParam('IsInternal', is_internal)) event.parameters.append(TelemetryEventParam('Operation', op)) - event.parameters.append(TelemetryEventParam('OperationSuccess', + event.parameters.append(TelemetryEventParam('OperationSuccess', is_success)) event.parameters.append(TelemetryEventParam('Message', message)) event.parameters.append(TelemetryEventParam('Duration', duration)) @@ -93,8 +97,10 @@ class EventLogger(object): except EventError as e: logger.error("{0}", e) + __event_logger__ = EventLogger() + def add_event(name, op="", is_success=True, duration=0, version=CURRENT_VERSION, message="", evt_type="", is_internal=False, reporter=__event_logger__): @@ -108,9 +114,11 @@ def add_event(name, op="", is_success=True, duration=0, version=CURRENT_VERSION, version=str(version), message=message, evt_type=evt_type, is_internal=is_internal) + def init_event_logger(event_dir, reporter=__event_logger__): reporter.event_dir = event_dir + def dump_unhandled_err(name): if hasattr(sys, 'last_type') and hasattr(sys, 'last_value') and \ hasattr(sys, 'last_traceback'): @@ -119,9 +127,10 @@ def dump_unhandled_err(name): last_traceback = getattr(sys, 'last_traceback') error = traceback.format_exception(last_type, last_value, last_traceback) - message= "".join(error) + message = "".join(error) add_event(name, is_success=False, message=message, op=WALAEventOperation.UnhandledError) + def enable_unhandled_err_dump(name): atexit.register(dump_unhandled_err, name) diff --git a/azurelinuxagent/common/osutil/debian.py b/azurelinuxagent/common/osutil/debian.py index f455572..b3db921 100644 --- a/azurelinuxagent/common/osutil/debian.py +++ b/azurelinuxagent/common/osutil/debian.py @@ -37,7 +37,7 @@ class DebianOSUtil(DefaultOSUtil): super(DebianOSUtil, self).__init__() def restart_ssh_service(self): - return shellutil.run("service sshd restart", chk_err=False) + return shellutil.run("systemctl --job-mode=ignore-dependencies try-reload-or-restart ssh", chk_err=False) def stop_agent_service(self): return shellutil.run("service azurelinuxagent stop", chk_err=False) @@ -45,3 +45,14 @@ class DebianOSUtil(DefaultOSUtil): def start_agent_service(self): return shellutil.run("service azurelinuxagent start", chk_err=False) + def start_network(self): + pass + + def remove_rules_files(self, rules_files=""): + pass + + def restore_rules_files(self, rules_files=""): + pass + + def get_dhcp_lease_endpoint(self): + return self.get_endpoint_from_leases_path('/var/lib/dhcp/dhclient.*.leases') diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index dc73379..4cd379b 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -51,7 +51,8 @@ class DefaultOSUtil(object): def __init__(self): self.agent_conf_file_path = '/etc/waagent.conf' - self.selinux=None + self.selinux = None + self.disable_route_warning = False def get_agent_conf_file_path(self): return self.agent_conf_file_path @@ -438,7 +439,8 @@ class DefaultOSUtil(object): iface=sock[i:i+16].split(b'\0', 1)[0] if len(iface) == 0 or self.is_loopback(iface) or iface != primary: # test the next one - logger.info('interface [{0}] skipped'.format(iface)) + if len(iface) != 0 and not self.disable_route_warning: + logger.info('interface [{0}] skipped'.format(iface)) continue else: # use this one @@ -470,7 +472,8 @@ class DefaultOSUtil(object): primary = None primary_metric = None - logger.info("examine /proc/net/route for primary interface") + if not self.disable_route_warning: + logger.info("examine /proc/net/route for primary interface") with open('/proc/net/route') as routing_table: idx = 0 for header in filter(lambda h: len(h) > 0, routing_table.readline().strip(" \n").split("\t")): @@ -494,11 +497,18 @@ class DefaultOSUtil(object): if primary is None: primary = '' - - logger.info('primary interface is [{0}]'.format(primary)) + if not self.disable_route_warning: + with open('/proc/net/route') as routing_table_fh: + routing_table_text = routing_table_fh.read() + logger.error('could not determine primary interface, ' + 'please ensure /proc/net/route is correct:\n' + '{0}'.format(routing_table_text)) + self.disable_route_warning = True + else: + logger.info('primary interface is [{0}]'.format(primary)) + self.disable_route_warning = False return primary - def is_primary_interface(self, ifname): """ Indicate whether the specified interface is the primary. @@ -507,13 +517,14 @@ class DefaultOSUtil(object): """ return self.get_primary_interface() == ifname - def is_loopback(self, ifname): s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP) result = fcntl.ioctl(s.fileno(), 0x8913, struct.pack('256s', ifname[:15])) flags, = struct.unpack('H', result[16:18]) isloopback = flags & 8 == 8 - logger.info('interface [{0}] has flags [{1}], is loopback [{2}]'.format(ifname, flags, isloopback)) + if not self.disable_route_warning: + logger.info('interface [{0}] has flags [{1}], ' + 'is loopback [{2}]'.format(ifname, flags, isloopback)) return isloopback def get_dhcp_lease_endpoint(self): @@ -675,6 +686,7 @@ class DefaultOSUtil(object): def publish_hostname(self, hostname): self.set_dhcp_hostname(hostname) + self.set_hostname_record(hostname) ifname = self.get_if_name() self.restart_if(ifname) @@ -725,22 +737,39 @@ class DefaultOSUtil(object): port_id = port_id - 2 device = None path = "/sys/bus/vmbus/devices/" - for vmbus in os.listdir(path): - deviceid = fileutil.read_file(os.path.join(path, vmbus, "device_id")) - guid = deviceid.lstrip('{').split('-') - if guid[0] == g0 and guid[1] == "000" + ustr(port_id): - for root, dirs, files in os.walk(path + vmbus): - if root.endswith("/block"): - device = dirs[0] - break - else : #older distros - for d in dirs: - if ':' in d and "block" == d.split(':')[0]: - device = d.split(':')[1] - break - break + if os.path.exists(path): + for vmbus in os.listdir(path): + deviceid = fileutil.read_file(os.path.join(path, vmbus, "device_id")) + guid = deviceid.lstrip('{').split('-') + if guid[0] == g0 and guid[1] == "000" + ustr(port_id): + for root, dirs, files in os.walk(path + vmbus): + if root.endswith("/block"): + device = dirs[0] + break + else : #older distros + for d in dirs: + if ':' in d and "block" == d.split(':')[0]: + device = d.split(':')[1] + break + break return device + def set_hostname_record(self, hostname): + fileutil.write_file(conf.get_published_hostname(), contents=hostname) + + def get_hostname_record(self): + hostname_record = conf.get_published_hostname() + if not os.path.exists(hostname_record): + # this file is created at provisioning time with agents >= 2.2.3 + hostname = socket.gethostname() + logger.warn('Hostname record does not exist, ' + 'creating [{0}] with hostname [{1}]', + hostname_record, + hostname) + self.set_hostname_record(hostname) + record = fileutil.read_file(hostname_record) + return record + def del_account(self, username): if self.is_sys_user(username): logger.error("{0} is a system user. Will not delete it.", username) @@ -749,10 +778,10 @@ class DefaultOSUtil(object): self.conf_sudoer(username, remove=True) def decode_customdata(self, data): - return base64.b64decode(data) + return base64.b64decode(data).decode('utf-8') def get_total_mem(self): - # Get total memory in bytes and divide by 1024**2 to get the valu in MB. + # Get total memory in bytes and divide by 1024**2 to get the value in MB. return os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') / (1024**2) def get_processor_cores(self): diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py index 2718ba1..acd7f6e 100644 --- a/azurelinuxagent/common/osutil/factory.py +++ b/azurelinuxagent/common/osutil/factory.py @@ -17,9 +17,7 @@ import azurelinuxagent.common.logger as logger from azurelinuxagent.common.utils.textutil import Version -from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION, \ - DISTRO_FULL_NAME - +from azurelinuxagent.common.version import * from .default import DefaultOSUtil from .clearlinux import ClearLinuxUtil from .coreos import CoreOSUtil @@ -27,54 +25,65 @@ from .debian import DebianOSUtil from .freebsd import FreeBSDOSUtil from .redhat import RedhatOSUtil, Redhat6xOSUtil from .suse import SUSEOSUtil, SUSE11OSUtil -from .ubuntu import UbuntuOSUtil, Ubuntu12OSUtil, Ubuntu14OSUtil, \ - UbuntuSnappyOSUtil +from .ubuntu import UbuntuOSUtil, Ubuntu12OSUtil, Ubuntu14OSUtil, UbuntuSnappyOSUtil from .alpine import AlpineOSUtil from .bigip import BigIpOSUtil -def get_osutil(distro_name=DISTRO_NAME, distro_version=DISTRO_VERSION, + +def get_osutil(distro_name=DISTRO_NAME, + distro_code_name=DISTRO_CODE_NAME, + distro_version=DISTRO_VERSION, distro_full_name=DISTRO_FULL_NAME): + if distro_name == "clear linux software for intel architecture": return ClearLinuxUtil() + if distro_name == "ubuntu": - if Version(distro_version) == Version("12.04") or \ - Version(distro_version) == Version("12.10"): + if Version(distro_version) == Version("12.04") or Version(distro_version) == Version("12.10"): return Ubuntu12OSUtil() - elif Version(distro_version) == Version("14.04") or \ - Version(distro_version) == Version("14.10"): + elif Version(distro_version) == Version("14.04") or Version(distro_version) == Version("14.10"): return Ubuntu14OSUtil() elif distro_full_name == "Snappy Ubuntu Core": return UbuntuSnappyOSUtil() else: return UbuntuOSUtil() + if distro_name == "alpine": return AlpineOSUtil() + if distro_name == "kali": - return DebianOSUtil() - if distro_name == "coreos": + return DebianOSUtil() + + if distro_name == "coreos" or distro_code_name == "coreos": return CoreOSUtil() + if distro_name == "suse": - if distro_full_name=='SUSE Linux Enterprise Server' and \ - Version(distro_version) < Version('12') or \ - distro_full_name == 'openSUSE' and \ - Version(distro_version) < Version('13.2'): + if distro_full_name == 'SUSE Linux Enterprise Server' \ + and Version(distro_version) < Version('12') \ + or distro_full_name == 'openSUSE' and Version(distro_version) < Version('13.2'): return SUSE11OSUtil() else: return SUSEOSUtil() + elif distro_name == "debian": return DebianOSUtil() - elif distro_name == "redhat" or distro_name == "centos" or \ - distro_name == "oracle": + + elif distro_name == "redhat" \ + or distro_name == "centos" \ + or distro_name == "oracle": if Version(distro_version) < Version("7"): return Redhat6xOSUtil() else: return RedhatOSUtil() + elif distro_name == "freebsd": return FreeBSDOSUtil() + elif distro_name == "bigip": return BigIpOSUtil() + else: - logger.warn("Unable to load distro implementation for {0}.", distro_name) - logger.warn("Use default distro implementation instead.") + logger.warn("Unable to load distro implementation for {0}. Using " + "default distro implementation instead.", + distro_name) return DefaultOSUtil() - diff --git a/azurelinuxagent/common/osutil/freebsd.py b/azurelinuxagent/common/osutil/freebsd.py index 54c7452..d0c40b9 100644 --- a/azurelinuxagent/common/osutil/freebsd.py +++ b/azurelinuxagent/common/osutil/freebsd.py @@ -77,7 +77,7 @@ class FreeBSDOSUtil(DefaultOSUtil): "").format(username, output)) def del_root_password(self): - err = shellutil.run('pw mod user root -w no') + err = shellutil.run('pw usermod root -h -') if err: raise OSUtilError("Failed to delete root password: Failed to update password database.") diff --git a/azurelinuxagent/common/osutil/redhat.py b/azurelinuxagent/common/osutil/redhat.py index 80370a2..5254ea5 100644 --- a/azurelinuxagent/common/osutil/redhat.py +++ b/azurelinuxagent/common/osutil/redhat.py @@ -36,6 +36,7 @@ import azurelinuxagent.common.utils.textutil as textutil from azurelinuxagent.common.utils.cryptutil import CryptUtil from azurelinuxagent.common.osutil.default import DefaultOSUtil + class Redhat6xOSUtil(DefaultOSUtil): def __init__(self): super(Redhat6xOSUtil, self).__init__() @@ -57,7 +58,7 @@ class Redhat6xOSUtil(DefaultOSUtil): def unregister_agent_service(self): return shellutil.run("chkconfig --del waagent", chk_err=False) - + def openssl_to_openssh(self, input_file, output_file): pubkey = fileutil.read_file(input_file) try: @@ -67,7 +68,7 @@ class Redhat6xOSUtil(DefaultOSUtil): raise OSUtilError(ustr(e)) fileutil.write_file(output_file, ssh_rsa_pubkey) - #Override + # Override def get_dhcp_pid(self): ret = shellutil.run_get_output("pidof dhclient", chk_err=False) return ret[1] if ret[0] == 0 else None @@ -84,22 +85,28 @@ class Redhat6xOSUtil(DefaultOSUtil): def set_dhcp_hostname(self, hostname): ifname = self.get_if_name() filepath = "/etc/sysconfig/network-scripts/ifcfg-{0}".format(ifname) - fileutil.update_conf_file(filepath, 'DHCP_HOSTNAME', + fileutil.update_conf_file(filepath, + 'DHCP_HOSTNAME', 'DHCP_HOSTNAME={0}'.format(hostname)) def get_dhcp_lease_endpoint(self): return self.get_endpoint_from_leases_path('/var/lib/dhclient/dhclient-*.leases') + class RedhatOSUtil(Redhat6xOSUtil): def __init__(self): super(RedhatOSUtil, self).__init__() def set_hostname(self, hostname): """ - Set /etc/hostname - Unlike redhat 6.x, redhat 7.x will set hostname to /etc/hostname + Unlike redhat 6.x, redhat 7.x will set hostname via hostnamectl + Due to a bug in systemd in Centos-7.0, if this call fails, fallback + to hostname. """ - DefaultOSUtil.set_hostname(self, hostname) + hostnamectl_cmd = "hostnamectl set-hostname {0}".format(hostname) + if shellutil.run(hostnamectl_cmd, chk_err=False) != 0: + logger.warn("[{0}] failed, attempting fallback".format(hostnamectl_cmd)) + DefaultOSUtil.set_hostname(self, hostname) def publish_hostname(self, hostname): """ @@ -118,5 +125,11 @@ class RedhatOSUtil(Redhat6xOSUtil): DefaultOSUtil.openssl_to_openssh(self, input_file, output_file) def get_dhcp_lease_endpoint(self): - # centos7 has this weird naming with double hyphen like /var/lib/dhclient/dhclient--eth0.lease - return self.get_endpoint_from_leases_path('/var/lib/dhclient/dhclient-*.lease') + # dhclient + endpoint = self.get_endpoint_from_leases_path('/var/lib/dhclient/dhclient-*.lease') + + if endpoint is None: + # NetworkManager + endpoint = self.get_endpoint_from_leases_path('/var/lib/NetworkManager/dhclient-*.lease') + + return endpoint diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index e83dd4b..bdae56e 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -16,7 +16,6 @@ # # Requires Python 2.4+ and Openssl 1.0+ # - from azurelinuxagent.common.protocol.wire import * from azurelinuxagent.common.utils import textutil @@ -32,6 +31,7 @@ HEADER_HOST_CONFIG_NAME = "x-ms-host-config-name" HEADER_ARTIFACT_LOCATION = "x-ms-artifact-location" HEADER_ARTIFACT_MANIFEST_LOCATION = "x-ms-artifact-manifest-location" + class HostPluginProtocol(object): def __init__(self, endpoint, container_id, role_config_name): if endpoint is None: @@ -41,6 +41,7 @@ class HostPluginProtocol(object): self.api_versions = None self.endpoint = endpoint self.container_id = container_id + self.deployment_id = None self.role_config_name = role_config_name self.manifest_uri = None @@ -49,6 +50,11 @@ class HostPluginProtocol(object): self.api_versions = self.get_api_versions() self.is_available = API_VERSION in self.api_versions self.is_initialized = True + + from azurelinuxagent.common.event import add_event, WALAEventOperation + add_event(name="WALA", + op=WALAEventOperation.InitializeHostPlugin, + is_success=self.is_available) return self.is_available def get_api_versions(self): @@ -74,10 +80,10 @@ class HostPluginProtocol(object): def get_artifact_request(self, artifact_url, artifact_manifest_url=None): if not self.ensure_initialized(): logger.error("host plugin channel is not available") - return + return None, None if textutil.is_str_none_or_whitespace(artifact_url): logger.error("no extension artifact url was provided") - return + return None, None url = URI_FORMAT_GET_EXTENSION_ARTIFACT.format(self.endpoint, HOST_PLUGIN_PORT) @@ -121,7 +127,10 @@ class HostPluginProtocol(object): 'content': status}, sort_keys=True) response = restutil.http_put(url, data=data, headers=headers) if response.status != httpclient.OK: - logger.error("PUT failed [{0}]", response.status) + logger.warn("PUT {0} [{1}: {2}]", + url, + response.status, + response.reason) else: logger.verbose("Successfully uploaded status to host plugin") except Exception as e: @@ -140,18 +149,20 @@ class HostPluginProtocol(object): if not self.ensure_initialized(): logger.error("host plugin channel is not available") return - if content is None or self.goal_state.container_id is None or self.goal_state.deployment_id is None: + if content is None \ + or self.container_id is None \ + or self.deployment_id is None: logger.error( "invalid arguments passed: " "[{0}], [{1}], [{2}]".format( content, - self.goal_state.container_id, - self.goal_state.deployment_id)) + self.container_id, + self.deployment_id)) return url = URI_FORMAT_PUT_LOG.format(self.endpoint, HOST_PLUGIN_PORT) - headers = {"x-ms-vmagentlog-deploymentid": self.goal_state.deployment_id, - "x-ms-vmagentlog-containerid": self.goal_state.container_id} + headers = {"x-ms-vmagentlog-deploymentid": self.deployment_id, + "x-ms-vmagentlog-containerid": self.container_id} logger.info("put VM log at [{0}]".format(url)) try: response = restutil.http_put(url, content, headers) diff --git a/azurelinuxagent/common/protocol/restapi.py b/azurelinuxagent/common/protocol/restapi.py index 5f71cf2..a42db37 100644 --- a/azurelinuxagent/common/protocol/restapi.py +++ b/azurelinuxagent/common/protocol/restapi.py @@ -16,11 +16,12 @@ # # Requires Python 2.4+ and Openssl 1.0+ # - +import socket import azurelinuxagent.common.logger as logger import azurelinuxagent.common.utils.restutil as restutil from azurelinuxagent.common.exception import ProtocolError, HttpError from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.version import DISTRO_VERSION, DISTRO_NAME, CURRENT_VERSION def validate_param(name, val, expected_type): @@ -87,8 +88,13 @@ Data contract between guest and host class VMInfo(DataContract): - def __init__(self, subscriptionId=None, vmName=None, containerId=None, - roleName=None, roleInstanceName=None, tenantName=None): + def __init__(self, + subscriptionId=None, + vmName=None, + containerId=None, + roleName=None, + roleInstanceName=None, + tenantName=None): self.subscriptionId = subscriptionId self.vmName = vmName self.containerId = containerId @@ -96,18 +102,26 @@ class VMInfo(DataContract): self.roleInstanceName = roleInstanceName self.tenantName = tenantName + class CertificateData(DataContract): def __init__(self, certificateData=None): self.certificateData = certificateData + class Cert(DataContract): - def __init__(self, name=None, thumbprint=None, certificateDataUri=None, storeName=None, storeLocation=None): + def __init__(self, + name=None, + thumbprint=None, + certificateDataUri=None, + storeName=None, + storeLocation=None): self.name = name self.thumbprint = thumbprint self.certificateDataUri = certificateDataUri self.storeLocation = storeLocation self.storeName = storeName + class CertList(DataContract): def __init__(self): self.certificates = DataContractList(Cert) @@ -131,8 +145,12 @@ class VMAgentManifestList(DataContract): class Extension(DataContract): - def __init__(self, name=None, sequenceNumber=None, publicSettings=None, - protectedSettings=None, certificateThumbprint=None): + def __init__(self, + name=None, + sequenceNumber=None, + publicSettings=None, + protectedSettings=None, + certificateThumbprint=None): self.name = name self.sequenceNumber = sequenceNumber self.publicSettings = publicSettings @@ -207,8 +225,13 @@ class ExtensionSubStatus(DataContract): class ExtensionStatus(DataContract): - def __init__(self, configurationAppliedTime=None, operation=None, - status=None, seq_no=None, code=None, message=None): + def __init__(self, + configurationAppliedTime=None, + operation=None, + status=None, + seq_no=None, + code=None, + message=None): self.configurationAppliedTime = configurationAppliedTime self.operation = operation self.status = status @@ -219,7 +242,11 @@ class ExtensionStatus(DataContract): class ExtHandlerStatus(DataContract): - def __init__(self, name=None, version=None, status=None, code=0, + def __init__(self, + name=None, + version=None, + status=None, + code=0, message=None): self.name = name self.version = version @@ -230,16 +257,19 @@ class ExtHandlerStatus(DataContract): class VMAgentStatus(DataContract): - def __init__(self, version=None, status=None, message=None): - self.version = version + def __init__(self, status=None, message=None): self.status = status self.message = message + self.hostname = socket.gethostname() + self.version = str(CURRENT_VERSION) + self.osname = DISTRO_NAME + self.osversion = DISTRO_VERSION self.extensionHandlers = DataContractList(ExtHandlerStatus) class VMStatus(DataContract): - def __init__(self): - self.vmAgent = VMAgentStatus() + def __init__(self, status, message): + self.vmAgent = VMAgentStatus(status=status, message=message) class TelemetryEventParam(DataContract): diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 9f634e9..71c3e37 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -250,6 +250,9 @@ def ga_status_to_v1(ga_status): v1_ga_status = { 'version': ga_status.version, 'status': ga_status.status, + 'osversion': ga_status.osversion, + 'osname': ga_status.osname, + 'hostname': ga_status.hostname, 'formattedMessage': formatted_msg } return v1_ga_status @@ -338,7 +341,7 @@ def vm_status_to_v1(vm_status, ext_statuses): 'handlerAggregateStatus': v1_handler_status_list } v1_vm_status = { - 'version': '1.0', + 'version': '1.1', 'timestampUTC': timestamp, 'aggregateStatus': v1_agg_status } @@ -638,9 +641,10 @@ class WireClient(object): uri, headers = host.get_artifact_request(version.uri) response = self.fetch(uri, headers) if not response: + host = self.get_host_plugin(force_update=True) logger.info("Retry fetch in {0} seconds", - LONG_WAITING_INTERVAL) - time.sleep(LONG_WAITING_INTERVAL) + SHORT_WAITING_INTERVAL) + time.sleep(SHORT_WAITING_INTERVAL) else: host.manifest_uri = version.uri logger.verbose("Manifest downloaded successfully from host plugin") @@ -659,9 +663,10 @@ class WireClient(object): if resp.status == httpclient.OK: return_value = self.decode_config(resp.read()) else: - logger.warn("Could not fetch {0} [{1}]", + logger.warn("Could not fetch {0} [{1}: {2}]", uri, - resp.status) + resp.status, + resp.reason) except (HttpError, ProtocolError) as e: logger.verbose("Fetch failed from [{0}]", uri) return return_value @@ -716,7 +721,7 @@ class WireClient(object): if not forced: last_incarnation = None - if (os.path.isfile(incarnation_file)): + if os.path.isfile(incarnation_file): last_incarnation = fileutil.read_file(incarnation_file) new_incarnation = goal_state.incarnation if last_incarnation is not None and \ @@ -731,11 +736,11 @@ class WireClient(object): file_name = GOAL_STATE_FILE_NAME.format(goal_state.incarnation) goal_state_file = os.path.join(conf.get_lib_dir(), file_name) self.save_cache(goal_state_file, xml_text) - self.save_cache(incarnation_file, goal_state.incarnation) self.update_hosting_env(goal_state) self.update_shared_conf(goal_state) self.update_certs(goal_state) self.update_ext_conf(goal_state) + self.save_cache(incarnation_file, goal_state.incarnation) if self.host_plugin is not None: self.host_plugin.container_id = goal_state.container_id self.host_plugin.role_config_name = goal_state.role_config_name @@ -760,7 +765,7 @@ class WireClient(object): return self.goal_state def get_hosting_env(self): - if (self.hosting_env is None): + if self.hosting_env is None: local_file = os.path.join(conf.get_lib_dir(), HOSTING_ENV_FILE_NAME) xml_text = self.fetch_cache(local_file) @@ -768,7 +773,7 @@ class WireClient(object): return self.hosting_env def get_shared_conf(self): - if (self.shared_conf is None): + if self.shared_conf is None: local_file = os.path.join(conf.get_lib_dir(), SHARED_CONF_FILE_NAME) xml_text = self.fetch_cache(local_file) @@ -776,7 +781,7 @@ class WireClient(object): return self.shared_conf def get_certs(self): - if (self.certs is None): + if self.certs is None: local_file = os.path.join(conf.get_lib_dir(), CERTS_FILE_NAME) xml_text = self.fetch_cache(local_file) self.certs = Certificates(self, xml_text) @@ -823,7 +828,7 @@ class WireClient(object): logger.info("Wire protocol version:{0}", PROTOCOL_VERSION) elif PROTOCOL_VERSION in version_info.get_supported(): logger.info("Wire protocol version:{0}", PROTOCOL_VERSION) - logger.warn("Server prefered version:{0}", preferred) + logger.warn("Server preferred version:{0}", preferred) else: error = ("Agent supported wire protocol version: {0} was not " "advised by Fabric.").format(PROTOCOL_VERSION) @@ -982,8 +987,12 @@ class WireClient(object): "x-ms-guest-agent-public-x509-cert": cert } - def get_host_plugin(self): - if self.host_plugin is None: + def get_host_plugin(self, force_update=False): + if self.host_plugin is None or force_update: + if force_update: + logger.warn("Forcing update of goal state") + self.goal_state = None + self.update_goal_state(forced=True) goal_state = self.get_goal_state() self.host_plugin = HostPluginProtocol(self.endpoint, goal_state.container_id, @@ -1402,7 +1411,6 @@ class InVMArtifactsProfile(object): * encryptedHealthChecks (optional) * encryptedApplicationProfile (optional) """ - def __init__(self, artifacts_profile): if not textutil.is_str_none_or_whitespace(artifacts_profile): self.__dict__.update(parse_json(artifacts_profile)) diff --git a/azurelinuxagent/common/rdma.py b/azurelinuxagent/common/rdma.py index ba9a029..226482d 100644 --- a/azurelinuxagent/common/rdma.py +++ b/azurelinuxagent/common/rdma.py @@ -133,14 +133,14 @@ class RDMAHandler(object): """Load the kernel driver, this depends on the proper driver to be installed with the install_driver() method""" logger.info("RDMA: probing module '%s'" % self.driver_module_name) - result = shellutil.run('modprobe %s' % self.driver_module_name) + result = shellutil.run('modprobe --first-time %s' % self.driver_module_name) if result != 0: error_msg = 'Could not load "%s" kernel module. ' - error_msg += 'Run "modprobe %s" as root for more details' + error_msg += 'Run "modprobe --first-time %s" as root for more details' logger.error( error_msg % (self.driver_module_name, self.driver_module_name) ) - return + return False logger.info('RDMA: Loaded the kernel driver successfully.') return True @@ -158,6 +158,7 @@ class RDMAHandler(object): logger.info('RDMA: module loaded.') return True logger.info('RDMA: module not loaded.') + return False def reboot_system(self): """Reboot the system. This is required as the kernel module for diff --git a/azurelinuxagent/common/utils/fileutil.py b/azurelinuxagent/common/utils/fileutil.py index b0b6fb7..8713d0c 100644 --- a/azurelinuxagent/common/utils/fileutil.py +++ b/azurelinuxagent/common/utils/fileutil.py @@ -140,9 +140,9 @@ def update_conf_file(path, line_start, val, chk_err=False): if not os.path.isfile(path) and chk_err: raise IOError("Can't find config file:{0}".format(path)) conf = read_file(path).split('\n') - conf = [x for x in conf if not x.startswith(line_start)] + conf = [x for x in conf if x is not None and len(x) > 0 and not x.startswith(line_start)] conf.append(val) - write_file(path, '\n'.join(conf)) + write_file(path, '\n'.join(conf) + '\n') def search_file(target_dir_name, target_file_name): for root, dirs, files in os.walk(target_dir_name): diff --git a/azurelinuxagent/common/utils/restutil.py b/azurelinuxagent/common/utils/restutil.py index 7c9ee17..7197370 100644 --- a/azurelinuxagent/common/utils/restutil.py +++ b/azurelinuxagent/common/utils/restutil.py @@ -29,6 +29,7 @@ REST api util functions """ RETRY_WAITING_INTERVAL = 10 +secure_warning = True def _parse_url(url): @@ -85,7 +86,7 @@ def _http_request(method, host, rel_uri, port=None, data=None, secure=False, timeout=10) url = rel_uri - logger.verbose("HTTPConnection [{0}] [{1}] [{2}] [{3}]", + logger.verbose("HTTP connection [{0}] [{1}] [{2}] [{3}]", method, url, data, @@ -104,6 +105,7 @@ def http_request(method, url, data, headers=None, max_retry=3, On error, sleep 10 and retry max_retry times. """ host, port, secure, rel_uri = _parse_url(url) + global secure_warning # Check proxy proxy_host, proxy_port = (None, None) @@ -112,24 +114,22 @@ def http_request(method, url, data, headers=None, max_retry=3, # If httplib module is not built with ssl support. Fallback to http if secure and not hasattr(httpclient, "HTTPSConnection"): - logger.warn("httplib is not built with ssl support") secure = False + if secure_warning: + logger.warn("httplib is not built with ssl support") + secure_warning = False # If httplib module doesn't support https tunnelling. Fallback to http if secure and proxy_host is not None and proxy_port is not None \ and not hasattr(httpclient.HTTPSConnection, "set_tunnel"): - logger.warn("httplib does not support https tunnelling " - "(new in python 2.7)") secure = False + if secure_warning: + logger.warn("httplib does not support https tunnelling " + "(new in python 2.7)") + secure_warning = False - logger.verbose("HTTP method: [{0}]", method) - logger.verbose("HTTP host: [{0}]", host) - logger.verbose("HTTP uri: [{0}]", rel_uri) - logger.verbose("HTTP port: [{0}]", port) - logger.verbose("HTTP data: [{0}]", data) - logger.verbose("HTTP secure: [{0}]", secure) - logger.verbose("HTTP headers: [{0}]", headers) - logger.verbose("HTTP proxy: [{0}:{1}]", proxy_host, proxy_port) + if proxy_host or proxy_port: + logger.verbose("HTTP proxy: [{0}:{1}]", proxy_host, proxy_port) retry_msg = '' log_msg = "HTTP {0}".format(method) @@ -152,8 +152,14 @@ def http_request(method, url, data, headers=None, max_retry=3, retry_interval = 5 except IOError as e: retry_msg = 'IO error: {0} {1}'.format(log_msg, e) - retry_interval = 0 - max_retry = 0 + # error 101: network unreachable; when the adapter resets we may + # see this transient error for a short time, retry once. + if e.errno == 101: + retry_interval = RETRY_WAITING_INTERVAL + max_retry = 1 + else: + retry_interval = 0 + max_retry = 0 if retry < max_retry: logger.info("Retry [{0}/{1} - {3}]", diff --git a/azurelinuxagent/common/utils/shellutil.py b/azurelinuxagent/common/utils/shellutil.py index d273c92..4efcbc4 100644 --- a/azurelinuxagent/common/utils/shellutil.py +++ b/azurelinuxagent/common/utils/shellutil.py @@ -17,13 +17,11 @@ # Requires Python 2.4+ and Openssl 1.0+ # -import platform -import os import subprocess -from azurelinuxagent.common.future import ustr import azurelinuxagent.common.logger as logger +from azurelinuxagent.common.future import ustr -if not hasattr(subprocess,'check_output'): +if not hasattr(subprocess, 'check_output'): def check_output(*popenargs, **kwargs): r"""Backport from subprocess module from python 2.7""" if 'stdout' in kwargs: @@ -39,51 +37,58 @@ if not hasattr(subprocess,'check_output'): raise subprocess.CalledProcessError(retcode, cmd, output=output) return output + # Exception classes used by this module. class CalledProcessError(Exception): def __init__(self, returncode, cmd, output=None): self.returncode = returncode self.cmd = cmd self.output = output + def __str__(self): return ("Command '{0}' returned non-zero exit status {1}" "").format(self.cmd, self.returncode) - subprocess.check_output=check_output - subprocess.CalledProcessError=CalledProcessError + subprocess.check_output = check_output + subprocess.CalledProcessError = CalledProcessError """ Shell command util functions """ + + def run(cmd, chk_err=True): """ Calls run_get_output on 'cmd', returning only the return code. If chk_err=True then errors will be reported in the log. If chk_err=False then errors will be suppressed from the log. """ - retcode,out=run_get_output(cmd,chk_err) + retcode, out = run_get_output(cmd, chk_err) return retcode + def run_get_output(cmd, chk_err=True, log_cmd=True): """ Wrapper for subprocess.check_output. - Execute 'cmd'. Returns return code and STDOUT, trapping expected exceptions. + Execute 'cmd'. Returns return code and STDOUT, trapping expected + exceptions. Reports exceptions to Error if chk_err parameter is True """ if log_cmd: logger.verbose(u"run cmd '{0}'", cmd) try: - output=subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, + shell=True) output = ustr(output, encoding='utf-8', errors="backslashreplace") - except subprocess.CalledProcessError as e : + except subprocess.CalledProcessError as e: output = ustr(e.output, encoding='utf-8', errors="backslashreplace") if chk_err: if log_cmd: logger.error(u"run cmd '{0}' failed", e.cmd) logger.error(u"Error Code:{0}", e.returncode) logger.error(u"Result:{0}", output) - return e.returncode, output + return e.returncode, output return 0, output @@ -103,5 +108,4 @@ def quote(word_list): return " ".join(list("'{0}'".format(s.replace("'", "'\\''")) for s in word_list)) - # End shell command util functions diff --git a/azurelinuxagent/common/utils/textutil.py b/azurelinuxagent/common/utils/textutil.py index 59b8fe7..2d99f6f 100644 --- a/azurelinuxagent/common/utils/textutil.py +++ b/azurelinuxagent/common/utils/textutil.py @@ -221,15 +221,24 @@ def hexstr_to_bytearray(a): def set_ssh_config(config, name, val): - notfound = True + found = False + no_match = -1 + + match_start = no_match for i in range(0, len(config)): - if config[i].startswith(name): + if config[i].startswith(name) and match_start == no_match: config[i] = "{0} {1}".format(name, val) - notfound = False - elif config[i].startswith("Match"): - # Match block must be put in the end of sshd config - break - if notfound: + found = True + elif config[i].lower().startswith("match"): + if config[i].lower().startswith("match all"): + # outside match block + match_start = no_match + elif match_start == no_match: + # inside match block + match_start = i + if not found: + if match_start != no_match: + i = match_start config.insert(i, "{0} {1}".format(name, val)) return config @@ -267,6 +276,9 @@ def gen_password_hash(password, crypt_id, salt_len): collection = string.ascii_letters + string.digits salt = ''.join(random.choice(collection) for _ in range(salt_len)) salt = "${0}${1}".format(crypt_id, salt) + if sys.version_info[0] == 2: + # if python 2.*, encode to type 'str' to prevent Unicode Encode Error from crypt.crypt + password = password.encode('utf-8') return crypt.crypt(password, salt) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 1099e25..30b751c 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -26,13 +26,13 @@ from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.future import ustr -""" -Add this workaround for detecting F5 products because BIG-IP/IQ/etc do not show -their version info in the /etc/product-version location. Instead, the version -and product information is contained in the /VERSION file. -""" def get_f5_platform(): - result = [None,None,None,None] + """ + Add this workaround for detecting F5 products because BIG-IP/IQ/etc do + not show their version info in the /etc/product-version location. Instead, + the version and product information is contained in the /VERSION file. + """ + result = [None, None, None, None] f5_version = re.compile("^Version: (\d+\.\d+\.\d+)") f5_product = re.compile("^Product: ([\w-]+)") @@ -56,13 +56,15 @@ def get_f5_platform(): result[2] = "iworkflow" return result + def get_distro(): if 'FreeBSD' in platform.system(): release = re.sub('\-.*\Z', '', ustr(platform.release())) osinfo = ['freebsd', release, '', 'freebsd'] elif 'linux_distribution' in dir(platform): + supported = platform._supported_dists + ('alpine',) osinfo = list(platform.linux_distribution(full_distribution_name=0, - supported_dists=platform._supported_dists+('alpine',))) + supported_dists=supported)) full_name = platform.linux_distribution()[0].strip() osinfo.append(full_name) else: @@ -86,7 +88,7 @@ def get_distro(): AGENT_NAME = "WALinuxAgent" AGENT_LONG_NAME = "Azure Linux Agent" -AGENT_VERSION = '2.2.2' +AGENT_VERSION = '2.2.6' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """\ The Azure Linux Agent supports the provisioning and running of Linux @@ -104,6 +106,7 @@ AGENT_DIR_PATTERN = re.compile(".*/{0}".format(AGENT_PATTERN)) EXT_HANDLER_PATTERN = b".*/WALinuxAgent-(\w.\w.\w[.\w]*)-.*-run-exthandlers" EXT_HANDLER_REGEX = re.compile(EXT_HANDLER_PATTERN) + # Set the CURRENT_AGENT and CURRENT_VERSION to match the agent directory name # - This ensures the agent will "see itself" using the same name and version # as the code that downloads agents. @@ -112,15 +115,19 @@ def set_current_agent(): lib_dir = conf.get_lib_dir() if lib_dir[-1] != os.path.sep: lib_dir += os.path.sep - if path[:len(lib_dir)] != lib_dir: + agent = path[len(lib_dir):].split(os.path.sep)[0] + match = AGENT_NAME_PATTERN.match(agent) + if match: + version = match.group(1) + else: agent = AGENT_LONG_VERSION version = AGENT_VERSION - else: - agent = path[len(lib_dir):].split(os.path.sep)[0] - version = AGENT_NAME_PATTERN.match(agent).group(1) return agent, FlexibleVersion(version) + + CURRENT_AGENT, CURRENT_VERSION = set_current_agent() + def set_goal_state_agent(): agent = None pids = [pid for pid in os.listdir('/proc') if pid.isdigit()] @@ -136,8 +143,11 @@ def set_goal_state_agent(): if agent is None: agent = CURRENT_VERSION return agent + + GOAL_STATE_AGENT_VERSION = set_goal_state_agent() + def is_current_agent_installed(): return CURRENT_AGENT == AGENT_LONG_VERSION @@ -153,13 +163,12 @@ PY_VERSION_MAJOR = sys.version_info[0] PY_VERSION_MINOR = sys.version_info[1] PY_VERSION_MICRO = sys.version_info[2] -""" -Add this workaround for detecting Snappy Ubuntu Core temporarily, until ubuntu -fixed this bug: https://bugs.launchpad.net/snappy/+bug/1481086 -""" - def is_snappy(): + """ + Add this workaround for detecting Snappy Ubuntu Core temporarily, + until ubuntu fixed this bug: https://bugs.launchpad.net/snappy/+bug/1481086 + """ if os.path.exists("/etc/motd"): motd = fileutil.read_file("/etc/motd") if "snappy" in motd: @@ -169,4 +178,3 @@ def is_snappy(): if is_snappy(): DISTRO_FULL_NAME = "Snappy Ubuntu Core" - diff --git a/azurelinuxagent/daemon/main.py b/azurelinuxagent/daemon/main.py index d3185a1..b0da02a 100644 --- a/azurelinuxagent/daemon/main.py +++ b/azurelinuxagent/daemon/main.py @@ -23,34 +23,32 @@ import time import traceback import azurelinuxagent.common.conf as conf -import azurelinuxagent.common.event as event -import azurelinuxagent.common.utils.fileutil as fileutil import azurelinuxagent.common.logger as logger - -from azurelinuxagent.common.future import ustr +import azurelinuxagent.common.utils.fileutil as fileutil from azurelinuxagent.common.event import add_event, WALAEventOperation -from azurelinuxagent.common.exception import ProtocolError +from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.protocol import get_protocol_util -from azurelinuxagent.common.rdma import RDMADeviceHandler, setup_rdma_device -from azurelinuxagent.common.utils.textutil import parse_doc, find, getattrib +from azurelinuxagent.common.rdma import setup_rdma_device from azurelinuxagent.common.version import AGENT_LONG_NAME, AGENT_VERSION, \ - DISTRO_NAME, DISTRO_VERSION, \ - DISTRO_FULL_NAME, PY_VERSION_MAJOR, \ - PY_VERSION_MINOR, PY_VERSION_MICRO + DISTRO_NAME, DISTRO_VERSION, PY_VERSION_MAJOR, PY_VERSION_MINOR, \ + PY_VERSION_MICRO from azurelinuxagent.daemon.resourcedisk import get_resourcedisk_handler from azurelinuxagent.daemon.scvmm import get_scvmm_handler +from azurelinuxagent.ga.update import get_update_handler from azurelinuxagent.pa.provision import get_provision_handler from azurelinuxagent.pa.rdma import get_rdma_handler -from azurelinuxagent.ga.update import get_update_handler + def get_daemon_handler(): return DaemonHandler() + class DaemonHandler(object): """ Main thread of daemon. It will invoke other threads to do actual work """ + def __init__(self): self.running = True self.osutil = get_osutil() @@ -59,7 +57,7 @@ class DaemonHandler(object): logger.info("{0} Version:{1}", AGENT_LONG_NAME, AGENT_VERSION) logger.info("OS: {0} {1}", DISTRO_NAME, DISTRO_VERSION) logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR, - PY_VERSION_MICRO) + PY_VERSION_MICRO) self.check_pid() @@ -68,12 +66,11 @@ class DaemonHandler(object): self.daemon() except Exception as e: err_msg = traceback.format_exc() - add_event("WALA", is_success=False, message=ustr(err_msg), + add_event("WALA", is_success=False, message=ustr(err_msg), op=WALAEventOperation.UnhandledError) logger.info("Sleep 15 seconds and restart daemon") time.sleep(15) - def check_pid(self): """Check whether daemon is already running""" pid = None @@ -84,11 +81,11 @@ class DaemonHandler(object): if self.osutil.check_pid_alive(pid): logger.info("Daemon is already running: {0}", pid) sys.exit(0) - + fileutil.write_file(pid_file, ustr(os.getpid())) def daemon(self): - logger.info("Run daemon") + logger.info("Run daemon") self.protocol_util = get_protocol_util() self.scvmm_handler = get_scvmm_handler() @@ -125,6 +122,6 @@ class DaemonHandler(object): logger.error("Error setting up rdma device: %s" % e) else: logger.info("RDMA capabilities are not enabled, skipping") - + while self.running: self.update_handler.run_latest() diff --git a/azurelinuxagent/daemon/resourcedisk/default.py b/azurelinuxagent/daemon/resourcedisk/default.py index 18ce884..21de38f 100644 --- a/azurelinuxagent/daemon/resourcedisk/default.py +++ b/azurelinuxagent/daemon/resourcedisk/default.py @@ -19,6 +19,8 @@ import os import re import sys import threading +from time import sleep + import azurelinuxagent.common.logger as logger from azurelinuxagent.common.future import ustr import azurelinuxagent.common.conf as conf @@ -99,7 +101,14 @@ class ResourceDiskHandler(object): existing) return existing - fileutil.mkdir(mount_point, mode=0o755) + try: + fileutil.mkdir(mount_point, mode=0o755) + except OSError as ose: + msg = "Failed to create mount point " \ + "directory [{0}]: {1}".format(mount_point, ose) + logger.error(msg) + raise ResourceDiskError(msg=msg, inner=ose) + logger.info("Examining partition table") ret = shellutil.run_get_output("parted {0} print".format(device)) if ret[0]: @@ -144,9 +153,23 @@ class ResourceDiskHandler(object): mount_string = self.get_mount_string(mount_options, partition, mount_point) + attempts = 5 + while not os.path.exists(partition) and attempts > 0: + logger.info("Waiting for partition [{0}], {1} attempts remaining", + partition, + attempts) + sleep(5) + attempts -= 1 + + if not os.path.exists(partition): + raise ResourceDiskError("Partition was not created [{0}]".format(partition)) + logger.info("Mount resource disk [{0}]", mount_string) - ret = shellutil.run(mount_string, chk_err=False) - if ret: + ret, output = shellutil.run_get_output(mount_string, chk_err=False) + # if the exit code is 32, then the resource disk is already mounted + if ret == 32: + logger.warn("Could not mount resource disk: {0}", output) + elif ret != 0: # Some kernels seem to issue an async partition re-read after a # 'parted' command invocation. This causes mount to fail if the # partition re-read is not complete by the time mount is @@ -154,19 +177,25 @@ class ResourceDiskHandler(object): # the partition and try mounting. logger.warn("Failed to mount resource disk. " "Retry mounting after re-reading partition info.") + if shellutil.run("sfdisk -R {0}".format(device), chk_err=False): shellutil.run("blockdev --rereadpt {0}".format(device), chk_err=False) - ret = shellutil.run(mount_string, chk_err=False) + + ret, output = shellutil.run_get_output(mount_string) if ret: logger.warn("Failed to mount resource disk. " - "Attempting to format and retry mount.") + "Attempting to format and retry mount. [{0}]", + output) + shellutil.run(mkfs_string) - ret = shellutil.run(mount_string) + ret, output = shellutil.run_get_output(mount_string) if ret: raise ResourceDiskError("Could not mount {0} " "after syncing partition table: " - "{1}".format(partition, ret)) + "[{1}] {2}".format(partition, + ret, + output)) logger.info("Resource disk {0} is mounted at {1} with {2}", device, @@ -217,7 +246,9 @@ class ResourceDiskHandler(object): swapfile = os.path.join(mount_point, 'swapfile') swaplist = shellutil.run_get_output("swapon -s")[1] - if swapfile in swaplist and os.path.getsize(swapfile) == size: + if swapfile in swaplist \ + and os.path.isfile(swapfile) \ + and os.path.getsize(swapfile) == size: logger.info("Swap already enabled") return @@ -253,8 +284,8 @@ class ResourceDiskHandler(object): if not isinstance(nbytes, int): nbytes = int(nbytes) - if nbytes < 0: - raise ValueError(nbytes) + if nbytes <= 0: + raise ResourceDiskError("Invalid swap size [{0}]".format(nbytes)) if os.path.isfile(filename): os.remove(filename) diff --git a/azurelinuxagent/daemon/resourcedisk/freebsd.py b/azurelinuxagent/daemon/resourcedisk/freebsd.py index 4ca0058..e43d9c4 100644 --- a/azurelinuxagent/daemon/resourcedisk/freebsd.py +++ b/azurelinuxagent/daemon/resourcedisk/freebsd.py @@ -47,7 +47,8 @@ class FreeBSDResourceDiskHandler(ResourceDiskHandler): dic[geom_name] = line[8:] return dic - def mount_resource_disk(self, mount_point, fs): + def mount_resource_disk(self, mount_point): + fs = self.fs if fs != 'ufs': raise ResourceDiskError("Unsupported filesystem type:{0}, only ufs is supported.".format(fs)) diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index 5d8da5c..c81eed7 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -56,9 +56,9 @@ class EnvHandler(object): self.stopped = False logger.info("Start env monitor service.") self.dhcp_handler.conf_routes() - self.hostname = socket.gethostname() + self.hostname = self.osutil.get_hostname_record() self.dhcpid = self.osutil.get_dhcp_pid() - self.server_thread = threading.Thread(target = self.monitor) + self.server_thread = threading.Thread(target=self.monitor) self.server_thread.setDaemon(True) self.server_thread.start() @@ -80,8 +80,9 @@ class EnvHandler(object): def handle_hostname_update(self): curr_hostname = socket.gethostname() if curr_hostname != self.hostname: - logger.info("EnvMonitor: Detected host name change: {0} -> {1}", - self.hostname, curr_hostname) + logger.info("EnvMonitor: Detected hostname change: {0} -> {1}", + self.hostname, + curr_hostname) self.osutil.set_hostname(curr_hostname) self.osutil.publish_hostname(curr_hostname) self.hostname = curr_hostname diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index c9e6b5f..9b99d04 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -107,7 +107,8 @@ def parse_ext_status(ext_status, data): if substatus_list is None: return for substatus in substatus_list: - ext_status.substatusList.append(parse_ext_substatus(substatus)) + if substatus is not None: + ext_status.substatusList.append(parse_ext_substatus(substatus)) # This code migrates, if it exists, handler state and status from an # agent-owned directory into the handler-owned config directory @@ -208,18 +209,18 @@ class ExtHandlersHandler(object): def handle_ext_handler(self, ext_handler, etag): ext_handler_i = ExtHandlerInstance(ext_handler, self.protocol) - ext_handler_i.decide_version() - if not ext_handler_i.is_upgrade and self.last_etag == etag: - if self.log_etag: - ext_handler_i.logger.verbose("Version {0} is current for etag {1}", - ext_handler_i.pkg.version, - etag) - self.log_etag = False - return + try: + ext_handler_i.decide_version() + if not ext_handler_i.is_upgrade and self.last_etag == etag: + if self.log_etag: + ext_handler_i.logger.verbose("Version {0} is current for etag {1}", + ext_handler_i.pkg.version, + etag) + self.log_etag = False + return - self.log_etag = True + self.log_etag = True - try: state = ext_handler.properties.state ext_handler_i.logger.info("Expected handler state: {0}", state) if state == "enabled": @@ -281,12 +282,8 @@ class ExtHandlersHandler(object): ext_handler_i.rm_ext_handler_dir() def report_ext_handlers_status(self): - """Go thru handler_state dir, collect and report status""" - vm_status = VMStatus() - vm_status.vmAgent.version = str(CURRENT_VERSION) - vm_status.vmAgent.status = "Ready" - vm_status.vmAgent.message = "Guest Agent is running" - + """Go through handler_state dir, collect and report status""" + vm_status = VMStatus(status="Ready", message="Guest Agent is running") if self.ext_handlers is not None: for ext_handler in self.ext_handlers.extHandlers: try: @@ -297,7 +294,7 @@ class ExtHandlersHandler(object): version=CURRENT_VERSION, is_success=False, message=ustr(e)) - + logger.verbose("Report vm agent status") try: self.protocol.report_vm_status(vm_status) @@ -330,7 +327,8 @@ class ExtHandlersHandler(object): ext_handler_i.set_handler_status(message=ustr(e), code=-1) vm_status.vmAgent.extensionHandlers.append(handler_status) - + + class ExtHandlerInstance(object): def __init__(self, ext_handler, protocol): self.ext_handler = ext_handler @@ -343,7 +341,7 @@ class ExtHandlerInstance(object): self.logger = logger.Logger(logger.DEFAULT_LOGGER, prefix) try: - fileutil.mkdir(self.get_log_dir(), mode=0o744) + fileutil.mkdir(self.get_log_dir(), mode=0o755) except IOError as e: self.logger.error(u"Failed to create extension log dir: {0}", e) @@ -669,7 +667,7 @@ class ExtHandlerInstance(object): ext_status.message = u"Failed to get status file {0}".format(e) ext_status.code = -1 ext_status.status = "error" - except ValueError as e: + except (ExtensionError, ValueError) as e: ext_status.message = u"Malformed status file {0}".format(e) ext_status.code = -1 ext_status.status = "error" @@ -717,7 +715,7 @@ class ExtHandlerInstance(object): def is_responsive(self, heartbeat_file): last_update=int(time.time() - os.stat(heartbeat_file).st_mtime) - return last_update > 600 # not updated for more than 10 min + return last_update <= 600 # updated within the last 10 min def launch_command(self, cmd, timeout=300): self.logger.info("Launch command:{0}", cmd) @@ -807,6 +805,9 @@ class ExtHandlerInstance(object): def set_handler_state(self, handler_state): state_dir = self.get_conf_dir() try: + if not os.path.exists(state_dir): + fileutil.mkdir(state_dir, mode=0o700) + state_file = os.path.join(state_dir, "HandlerState") fileutil.write_file(state_file, handler_state) except IOError as e: diff --git a/azurelinuxagent/ga/monitor.py b/azurelinuxagent/ga/monitor.py index 478a7a3..7ef7f04 100644 --- a/azurelinuxagent/ga/monitor.py +++ b/azurelinuxagent/ga/monitor.py @@ -25,7 +25,7 @@ import threading import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger -from azurelinuxagent.common.event import WALAEventOperation, add_event +from azurelinuxagent.common.event import add_event, WALAEventOperation from azurelinuxagent.common.exception import EventError, ProtocolError, OSUtilError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil @@ -162,7 +162,7 @@ class MonitorHandler(object): try: event = parse_event(data_str) - event.parameters.extend(self.sysinfo) + self.add_sysinfo(event) event_list.events.append(event) except (ValueError, ProtocolError) as e: logger.warn("Failed to decode event file: {0}", e) @@ -193,3 +193,13 @@ class MonitorHandler(object): except Exception as e: logger.warn("Failed to send events: {0}", e) time.sleep(60) + + def add_sysinfo(self, event): + sysinfo_names = [v.name for v in self.sysinfo] + for param in event.parameters: + if param.name in sysinfo_names: + logger.verbose("Remove existing event parameter: [{0}:{1}]", + param.name, + param.value) + event.parameters.remove(param) + event.parameters.extend(self.sysinfo) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 996484b..59bc70c 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -701,11 +701,15 @@ class GuestAgent(object): if self._fetch(uri.uri): break else: - if self.host is not None: - logger.info("Download unsuccessful, falling back to host plugin") + if self.host is not None and self.host.ensure_initialized(): + logger.warn("Download unsuccessful, falling back to host plugin") uri, headers = self.host.get_artifact_request(uri.uri, self.host.manifest_uri) - if self._fetch(uri, headers=headers): + if uri is not None \ + and headers is not None \ + and self._fetch(uri, headers=headers): break + else: + logger.warn("Download unsuccessful, host plugin not available") if not os.path.isfile(self.get_agent_pkg_path()): msg = u"Unable to download Agent {0} from any URI".format(self.name) @@ -730,9 +734,9 @@ class GuestAgent(object): logger.info(u"Agent {0} downloaded from {1}", self.name, uri) except restutil.HttpError as http_error: logger.verbose(u"Agent {0} download from {1} failed [{2}]", - self.name, - uri, - http_error) + self.name, + uri, + http_error) return package is not None def _load_error(self): diff --git a/azurelinuxagent/pa/deprovision/default.py b/azurelinuxagent/pa/deprovision/default.py index a702d3f..ced87ee 100644 --- a/azurelinuxagent/pa/deprovision/default.py +++ b/azurelinuxagent/pa/deprovision/default.py @@ -40,6 +40,7 @@ class DeprovisionHandler(object): def __init__(self): self.osutil = get_osutil() self.protocol_util = get_protocol_util() + self.actions_running = False signal.signal(signal.SIGINT, self.handle_interrupt_signal) def del_root_password(self, warnings, actions): @@ -87,8 +88,9 @@ class DeprovisionHandler(object): dirs_to_del = ["/var/lib/dhclient", "/var/lib/dhcpcd", "/var/lib/dhcp"] actions.append(DeprovisionAction(fileutil.rm_dirs, dirs_to_del)) - # For Freebsd - actions.append(DeprovisionAction(fileutil.rm_files, ["/var/db/dhclient.leases.hn0"])) + # For Freebsd, NM controlled + actions.append(DeprovisionAction(fileutil.rm_files, ["/var/db/dhclient.leases.hn0", + "/var/lib/NetworkManager/dhclient-*.lease"])) def del_lib_dir(self, warnings, actions): dirs_to_del = [conf.get_lib_dir()] @@ -134,11 +136,16 @@ class DeprovisionHandler(object): if not confirm.lower().startswith('y'): return + self.actions_running = True for action in actions: action.invoke() - def handle_interrupt_signal(self, frame): - print("Deprovision is interrupted.") - sys.exit(0) + def handle_interrupt_signal(self, signum, frame): + if not self.actions_running: + print("Deprovision is interrupted.") + sys.exit(0) + + print ('Deprovisioning may not be interrupted.') + return diff --git a/azurelinuxagent/pa/provision/default.py b/azurelinuxagent/pa/provision/default.py index e851036..a99a006 100644 --- a/azurelinuxagent/pa/provision/default.py +++ b/azurelinuxagent/pa/provision/default.py @@ -100,10 +100,10 @@ class ProvisionHandler(object): def provision(self, ovfenv): logger.info("Handle ovf-env.xml.") try: - logger.info("Set host name.") + logger.info("Set hostname [{0}]".format(ovfenv.hostname)) self.osutil.set_hostname(ovfenv.hostname) - logger.info("Publish host name.") + logger.info("Publish hostname [{0}]".format(ovfenv.hostname)) self.osutil.publish_hostname(ovfenv.hostname) self.config_user_account(ovfenv) diff --git a/azurelinuxagent/pa/provision/ubuntu.py b/azurelinuxagent/pa/provision/ubuntu.py index 7f2bce3..a71df37 100644 --- a/azurelinuxagent/pa/provision/ubuntu.py +++ b/azurelinuxagent/pa/provision/ubuntu.py @@ -19,14 +19,12 @@ import os import time -import azurelinuxagent.common.logger as logger -from azurelinuxagent.common.future import ustr + import azurelinuxagent.common.conf as conf -import azurelinuxagent.common.protocol.ovfenv as ovfenv -from azurelinuxagent.common.event import add_event, WALAEventOperation -from azurelinuxagent.common.exception import ProvisionError, ProtocolError -import azurelinuxagent.common.utils.shellutil as shellutil +import azurelinuxagent.common.logger as logger import azurelinuxagent.common.utils.fileutil as fileutil +from azurelinuxagent.common.exception import ProvisionError, ProtocolError +from azurelinuxagent.common.future import ustr from azurelinuxagent.pa.provision.default import ProvisionHandler """ @@ -91,8 +89,14 @@ class UbuntuProvisionHandler(ProvisionHandler): path = '/etc/ssh/ssh_host_{0}_key.pub'.format(keypair_type) for retry in range(0, max_retry): if os.path.isfile(path): - return self.get_ssh_host_key_thumbprint(keypair_type) + logger.info("ssh host key found at: {0}".format(path)) + try: + thumbprint = self.get_ssh_host_key_thumbprint(keypair_type) + logger.info("Thumbprint obtained from : {0}".format(path)) + return thumbprint + except ProvisionError: + logger.warn("Could not get thumbprint from {0}".format(path)) if retry < max_retry - 1: - logger.info("Wait for ssh host key be generated: {0}", path) + logger.info("Wait for ssh host key be generated: {0}".format(path)) time.sleep(5) raise ProvisionError("ssh host key is not generated.") diff --git a/azurelinuxagent/pa/rdma/centos.py b/azurelinuxagent/pa/rdma/centos.py index 8ad09c5..214f9ea 100644 --- a/azurelinuxagent/pa/rdma/centos.py +++ b/azurelinuxagent/pa/rdma/centos.py @@ -173,8 +173,7 @@ class CentOSRDMAHandler(RDMAHandler): 'user mode', self.rdma_user_mode_package_name, umod_pkg_path) logger.info("RDMA: driver packages installed") - self.load_driver_module() - if not self.is_driver_loaded(): + if not self.load_driver_module() or not self.is_driver_loaded(): logger.info("RDMA: driver module is not loaded; reboot required") self.reboot_system() else: diff --git a/azurelinuxagent/pa/rdma/suse.py b/azurelinuxagent/pa/rdma/suse.py index f0d8d0f..d31b2b0 100644 --- a/azurelinuxagent/pa/rdma/suse.py +++ b/azurelinuxagent/pa/rdma/suse.py @@ -93,8 +93,7 @@ class SUSERDMAHandler(RDMAHandler): msg = 'RDMA: Successfully installed "%s" from ' msg += 'configured repositories' logger.info(msg % complete_name) - self.load_driver_module() - if requires_reboot: + if not self.load_driver_module() or requires_reboot: self.reboot_system() return True else: @@ -119,8 +118,7 @@ class SUSERDMAHandler(RDMAHandler): msg = 'RDMA: Successfully installed "%s" from ' msg += 'local package cache' logger.info(msg % (local_package)) - self.load_driver_module() - if requires_reboot: + if not self.load_driver_module() or requires_reboot: self.reboot_system() return True else: |