diff options
Diffstat (limited to 'azurelinuxagent/common')
-rw-r--r-- | azurelinuxagent/common/conf.py | 9 | ||||
-rw-r--r-- | azurelinuxagent/common/errorstate.py | 31 | ||||
-rw-r--r-- | azurelinuxagent/common/event.py | 92 | ||||
-rw-r--r-- | azurelinuxagent/common/exception.py | 42 | ||||
-rw-r--r-- | azurelinuxagent/common/future.py | 7 | ||||
-rw-r--r-- | azurelinuxagent/common/logger.py | 34 | ||||
-rw-r--r-- | azurelinuxagent/common/osutil/default.py | 97 | ||||
-rw-r--r-- | azurelinuxagent/common/osutil/factory.py | 9 | ||||
-rw-r--r-- | azurelinuxagent/common/osutil/ubuntu.py | 37 | ||||
-rw-r--r-- | azurelinuxagent/common/protocol/metadata.py | 7 | ||||
-rw-r--r-- | azurelinuxagent/common/protocol/restapi.py | 12 | ||||
-rw-r--r-- | azurelinuxagent/common/protocol/util.py | 21 | ||||
-rw-r--r-- | azurelinuxagent/common/protocol/wire.py | 221 | ||||
-rw-r--r-- | azurelinuxagent/common/utils/cryptutil.py | 11 | ||||
-rw-r--r-- | azurelinuxagent/common/utils/fileutil.py | 35 | ||||
-rw-r--r-- | azurelinuxagent/common/utils/flexible_version.py | 2 | ||||
-rw-r--r-- | azurelinuxagent/common/utils/restutil.py | 105 | ||||
-rw-r--r-- | azurelinuxagent/common/utils/shellutil.py | 26 | ||||
-rw-r--r-- | azurelinuxagent/common/version.py | 5 |
19 files changed, 600 insertions, 203 deletions
diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 75a0248..33f953a 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -40,11 +40,11 @@ class ConfigurationProvider(object): raise AgentConfigError("Can't not parse empty configuration") for line in content.split('\n'): if not line.startswith("#") and "=" in line: - parts = line.split('=') + parts = line.split('=', 1) if len(parts) < 2: continue key = parts[0].strip() - value = parts[1].strip("\" ") + value = parts[1].split('#')[0].strip("\" ").strip() self.values[key] = value if value != "None" else None def get(self, key, default_val): @@ -131,12 +131,14 @@ __STRING_OPTIONS__ = { } __INTEGER_OPTIONS__ = { + "OS.SshClientAliveInterval" : 180, "Provisioning.PasswordCryptSaltLength" : 10, "HttpProxy.Port" : None, "ResourceDisk.SwapSizeMB" : 0, "Autoupdate.Frequency" : 3600 } + def get_configuration(conf=__conf__): options = {} for option in __SWITCH_OPTIONS__: @@ -190,6 +192,9 @@ def get_fips_enabled(conf=__conf__): def get_openssl_cmd(conf=__conf__): return conf.get("OS.OpensslPath", "/usr/bin/openssl") +def get_ssh_client_alive_interval(conf=__conf__): + return conf.get("OS.SshClientAliveInterval", 180) + def get_ssh_dir(conf=__conf__): return conf.get("OS.SshDir", "/etc/ssh") diff --git a/azurelinuxagent/common/errorstate.py b/azurelinuxagent/common/errorstate.py new file mode 100644 index 0000000..750aa77 --- /dev/null +++ b/azurelinuxagent/common/errorstate.py @@ -0,0 +1,31 @@ +from datetime import datetime, timedelta + +ERROR_STATE_DELTA = timedelta(minutes=15) + + +class ErrorState(object): + def __init__(self, min_timedelta = ERROR_STATE_DELTA): + self.min_timedelta = min_timedelta + + self.count = 0 + self.timestamp = None + + def incr(self): + if self.count == 0: + self.timestamp = datetime.utcnow() + + self.count += 1 + + def reset(self): + self.count = 0 + self.timestamp = None + + def is_triggered(self): + if self.timestamp is None: + return False + + delta = datetime.utcnow() - self.timestamp + if delta >= self.min_timedelta: + return True + + return False diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index e62a925..84a439f 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -15,51 +15,58 @@ # Requires Python 2.4+ and Openssl 1.0+ # -import os -import sys -import traceback import atexit +import datetime import json +import os +import sys import time -import datetime -import threading -import platform +import traceback -from datetime import datetime, timedelta +from datetime import datetime import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger -from azurelinuxagent.common.exception import EventError, ProtocolError +from azurelinuxagent.common.exception import EventError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.restapi import TelemetryEventParam, \ - TelemetryEventList, \ TelemetryEvent, \ - set_properties, get_properties -from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION, \ - DISTRO_CODE_NAME, AGENT_VERSION, \ - CURRENT_AGENT, CURRENT_VERSION + get_properties +from azurelinuxagent.common.version import CURRENT_VERSION _EVENT_MSG = "Event: name={0}, op={1}, message={2}, duration={3}" + class WALAEventOperation: ActivateResourceDisk = "ActivateResourceDisk" + AgentBlacklisted = "AgentBlacklisted" + AgentEnabled = "AgentEnabled" AutoUpdate = "AutoUpdate" + CustomData = "CustomData" + Deploy = "Deploy" Disable = "Disable" Download = "Download" Enable = "Enable" + ExtensionProcessing = "ExtensionProcessing" Firewall = "Firewall" HealthCheck = "HealthCheck" HeartBeat = "HeartBeat" HostPlugin = "HostPlugin" + HttpErrors = "HttpErrors" Install = "Install" InitializeHostPlugin = "InitializeHostPlugin" + Log = "Log" + Partition = "Partition" ProcessGoalState = "ProcessGoalState" Provision = "Provision" + GuestState = "GuestState" ReportStatus = "ReportStatus" Restart = "Restart" + SkipUpdate = "SkipUpdate" UnhandledError = "UnhandledError" UnInstall = "UnInstall" + Unknown = "Unknown" Upgrade = "Upgrade" Update = "Update" @@ -67,7 +74,7 @@ class WALAEventOperation: class EventStatus(object): EVENT_STATUS_FILE = "event_status.json" - def __init__(self, status_dir=conf.get_lib_dir()): + def __init__(self): self._path = None self._status = {} @@ -82,7 +89,7 @@ class EventStatus(object): event = self._event_name(name, version, op) if event not in self._status: return True - return self._status[event] == True + return self._status[event] is True def initialize(self, status_dir=conf.get_lib_dir()): self._path = os.path.join(status_dir, EventStatus.EVENT_STATUS_FILE) @@ -90,7 +97,7 @@ class EventStatus(object): def mark_event_status(self, name, version, op, status): event = self._event_name(name, version, op) - self._status[event] = (status == True) + self._status[event] = (status is True) self._save() def _event_name(self, name, version, op): @@ -113,6 +120,7 @@ class EventStatus(object): except Exception as e: logger.warn("Exception occurred saving event status: {0}".format(e)) + __event_status__ = EventStatus() __event_status_operations__ = [ WALAEventOperation.AutoUpdate, @@ -171,7 +179,7 @@ class EventLogger(object): (self.periodic_events[h] + delta) <= datetime.now() def add_periodic(self, - delta, name, op="", is_success=True, duration=0, + delta, name, op=WALAEventOperation.Unknown, is_success=True, duration=0, version=CURRENT_VERSION, message="", evt_type="", is_internal=False, log_event=True, force=False): @@ -186,7 +194,7 @@ class EventLogger(object): def add_event(self, name, - op="", + op=WALAEventOperation.Unknown, is_success=True, duration=0, version=CURRENT_VERSION, @@ -215,6 +223,32 @@ class EventLogger(object): except EventError as e: logger.error("{0}", e) + def add_log_event(self, level, message): + # By the time the message has gotten to this point it is formatted as + # + # YYYY/MM/DD HH:mm:ss.fffffff LEVEL <text>. + # + # The timestamp and the level are redundant, and should be stripped. + # The logging library does not schematize this data, so I am forced + # to parse the message. The format is regular, so the burden is low. + + parts = message.split(' ', 3) + msg = parts[3] if len(parts) == 4 \ + else message + + event = TelemetryEvent(7, "FFF0196F-EE4C-4EAF-9AA5-776F622DEB4F") + event.parameters.append(TelemetryEventParam('EventName', WALAEventOperation.Log)) + event.parameters.append(TelemetryEventParam('CapabilityUsed', logger.LogLevel.STRINGS[level])) + event.parameters.append(TelemetryEventParam('Context1', msg)) + event.parameters.append(TelemetryEventParam('Context2', '')) + event.parameters.append(TelemetryEventParam('Context3', '')) + + data = get_properties(event) + try: + self.save_event(json.dumps(data)) + except EventError: + pass + __event_logger__ = EventLogger() @@ -224,6 +258,7 @@ def elapsed_milliseconds(utc_start): return int(((d.days * 24 * 60 * 60 + d.seconds) * 1000) + \ (d.microseconds / 1000.0)) + def report_event(op, is_success=True, message=''): from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION add_event(AGENT_NAME, @@ -232,6 +267,7 @@ def report_event(op, is_success=True, message=''): message=message, op=op) + def report_periodic(delta, op, is_success=True, message=''): from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION add_periodic(delta, AGENT_NAME, @@ -240,7 +276,9 @@ def report_periodic(delta, op, is_success=True, message=''): message=message, op=op) -def add_event(name, op="", is_success=True, duration=0, version=CURRENT_VERSION, + +def add_event(name, op=WALAEventOperation.Unknown, is_success=True, duration=0, + version=CURRENT_VERSION, message="", evt_type="", is_internal=False, log_event=True, reporter=__event_logger__): if reporter.event_dir is None: @@ -255,8 +293,17 @@ def add_event(name, op="", is_success=True, duration=0, version=CURRENT_VERSION, version=str(version), message=message, evt_type=evt_type, is_internal=is_internal, log_event=log_event) + +def add_log_event(level, message, reporter=__event_logger__): + if reporter.event_dir is None: + return + + reporter.add_log_event(level, message) + + def add_periodic( - delta, name, op="", is_success=True, duration=0, version=CURRENT_VERSION, + delta, name, op=WALAEventOperation.Unknown, is_success=True, duration=0, + version=CURRENT_VERSION, message="", evt_type="", is_internal=False, log_event=True, force=False, reporter=__event_logger__): if reporter.event_dir is None: @@ -269,10 +316,12 @@ def add_periodic( version=str(version), message=message, evt_type=evt_type, is_internal=is_internal, log_event=log_event, force=force) + def mark_event_status(name, version, op, status): if op in __event_status_operations__: __event_status__.mark_event_status(name, version, op, status) + def should_emit_event(name, version, op, status): return \ op not in __event_status_operations__ or \ @@ -280,12 +329,15 @@ def should_emit_event(name, version, op, status): not __event_status__.event_marked(name, version, op) or \ __event_status__.event_succeeded(name, version, op) != status + def init_event_logger(event_dir): __event_logger__.event_dir = event_dir + def init_event_status(status_dir): __event_status__.initialize(status_dir) + def dump_unhandled_err(name): if hasattr(sys, 'last_type') and hasattr(sys, 'last_value') and \ hasattr(sys, 'last_traceback'): diff --git a/azurelinuxagent/common/exception.py b/azurelinuxagent/common/exception.py index 17c6ce0..bb8ab10 100644 --- a/azurelinuxagent/common/exception.py +++ b/azurelinuxagent/common/exception.py @@ -26,8 +26,8 @@ class AgentError(Exception): Base class of agent error. """ - def __init__(self, errno, msg, inner=None): - msg = u"[{0}] {1}".format(errno, msg) + def __init__(self, msg, inner=None): + msg = u"[{0}] {1}".format(type(self).__name__, msg) if inner is not None: msg = u"{0}\nInner error: {1}".format(msg, inner) super(AgentError, self).__init__(msg) @@ -39,16 +39,16 @@ class AgentConfigError(AgentError): """ def __init__(self, msg=None, inner=None): - super(AgentConfigError, self).__init__('000001', msg, inner) + super(AgentConfigError, self).__init__(msg, inner) class AgentNetworkError(AgentError): """ - When network is not avaiable. + When network is not available\. """ def __init__(self, msg=None, inner=None): - super(AgentNetworkError, self).__init__('000002', msg, inner) + super(AgentNetworkError, self).__init__(msg, inner) class ExtensionError(AgentError): @@ -57,7 +57,7 @@ class ExtensionError(AgentError): """ def __init__(self, msg=None, inner=None): - super(ExtensionError, self).__init__('000003', msg, inner) + super(ExtensionError, self).__init__(msg, inner) class ProvisionError(AgentError): @@ -66,7 +66,7 @@ class ProvisionError(AgentError): """ def __init__(self, msg=None, inner=None): - super(ProvisionError, self).__init__('000004', msg, inner) + super(ProvisionError, self).__init__(msg, inner) class ResourceDiskError(AgentError): @@ -75,7 +75,7 @@ class ResourceDiskError(AgentError): """ def __init__(self, msg=None, inner=None): - super(ResourceDiskError, self).__init__('000005', msg, inner) + super(ResourceDiskError, self).__init__(msg, inner) class DhcpError(AgentError): @@ -84,7 +84,8 @@ class DhcpError(AgentError): """ def __init__(self, msg=None, inner=None): - super(DhcpError, self).__init__('000006', msg, inner) + super(DhcpError, self).__init__(msg, inner) + class OSUtilError(AgentError): """ @@ -92,7 +93,7 @@ class OSUtilError(AgentError): """ def __init__(self, msg=None, inner=None): - super(OSUtilError, self).__init__('000007', msg, inner) + super(OSUtilError, self).__init__(msg, inner) class ProtocolError(AgentError): @@ -101,7 +102,7 @@ class ProtocolError(AgentError): """ def __init__(self, msg=None, inner=None): - super(ProtocolError, self).__init__('000008', msg, inner) + super(ProtocolError, self).__init__(msg, inner) class ProtocolNotFoundError(ProtocolError): @@ -113,13 +114,22 @@ class ProtocolNotFoundError(ProtocolError): super(ProtocolNotFoundError, self).__init__(msg, inner) +class RestartError(ProtocolError): + """ + Variant of ProtocolError used to restart processing if the GoalState + becomes stale. + """ + + pass + + class HttpError(AgentError): """ Http request failure """ def __init__(self, msg=None, inner=None): - super(HttpError, self).__init__('000009', msg, inner) + super(HttpError, self).__init__(msg, inner) class EventError(AgentError): @@ -128,7 +138,7 @@ class EventError(AgentError): """ def __init__(self, msg=None, inner=None): - super(EventError, self).__init__('000010', msg, inner) + super(EventError, self).__init__(msg, inner) class CryptError(AgentError): @@ -137,7 +147,7 @@ class CryptError(AgentError): """ def __init__(self, msg=None, inner=None): - super(CryptError, self).__init__('000011', msg, inner) + super(CryptError, self).__init__(msg, inner) class UpdateError(AgentError): @@ -146,7 +156,7 @@ class UpdateError(AgentError): """ def __init__(self, msg=None, inner=None): - super(UpdateError, self).__init__('000012', msg, inner) + super(UpdateError, self).__init__(msg, inner) class ResourceGoneError(HttpError): @@ -155,4 +165,6 @@ class ResourceGoneError(HttpError): """ def __init__(self, msg=None, inner=None): + if msg is None: + msg = "Resource is gone" super(ResourceGoneError, self).__init__(msg, inner) diff --git a/azurelinuxagent/common/future.py b/azurelinuxagent/common/future.py index 8d5b70b..8389cd3 100644 --- a/azurelinuxagent/common/future.py +++ b/azurelinuxagent/common/future.py @@ -1,10 +1,10 @@ import sys """ -Add alies for python2 and python3 libs and fucntions. +Add alias for python2 and python3 libs and functions. """ -if sys.version_info[0]== 3: +if sys.version_info[0] == 3: import http.client as httpclient from urllib.parse import urlparse @@ -23,5 +23,4 @@ elif sys.version_info[0] == 2: bytebuffer = buffer else: - raise ImportError("Unknown python version:{0}".format(sys.version_info)) - + raise ImportError("Unknown python version: {0}".format(sys.version_info)) diff --git a/azurelinuxagent/common/logger.py b/azurelinuxagent/common/logger.py index bfdc73a..0a90718 100644 --- a/azurelinuxagent/common/logger.py +++ b/azurelinuxagent/common/logger.py @@ -17,8 +17,8 @@ """ Log utils """ -import os import sys + from azurelinuxagent.common.future import ustr from datetime import datetime, timedelta @@ -28,6 +28,7 @@ EVERY_HOUR = timedelta(hours=1) EVERY_HALF_HOUR = timedelta(minutes=30) EVERY_FIFTEEN_MINUTES = timedelta(minutes=15) + class Logger(object): """ Logger class @@ -92,6 +93,7 @@ class Logger(object): appender = _create_logger_appender(appender_type, level, path) self.appenders.append(appender) + class ConsoleAppender(object): def __init__(self, level, path): self.level = level @@ -105,6 +107,7 @@ class ConsoleAppender(object): except IOError: pass + class FileAppender(object): def __init__(self, level, path): self.level = level @@ -118,6 +121,7 @@ class FileAppender(object): except IOError: pass + class StdoutAppender(object): def __init__(self, level): self.level = level @@ -129,9 +133,24 @@ class StdoutAppender(object): except IOError: pass + +class TelemetryAppender(object): + def __init__(self, level, event_func): + self.level = level + self.event_func = event_func + + def write(self, level, msg): + if self.level <= level: + try: + self.event_func(level, msg) + except IOError: + pass + + #Initialize logger instance DEFAULT_LOGGER = Logger() + class LogLevel(object): VERBOSE = 0 INFO = 1 @@ -144,35 +163,46 @@ class LogLevel(object): "ERROR" ] + class AppenderType(object): FILE = 0 CONSOLE = 1 STDOUT = 2 + TELEMETRY = 3 + def add_logger_appender(appender_type, level=LogLevel.INFO, path=None): DEFAULT_LOGGER.add_appender(appender_type, level, path) + def reset_periodic(): DEFAULT_LOGGER.reset_periodic() + def periodic(delta, msg_format, *args): DEFAULT_LOGGER.periodic(delta, msg_format, *args) + def verbose(msg_format, *args): DEFAULT_LOGGER.verbose(msg_format, *args) + def info(msg_format, *args): DEFAULT_LOGGER.info(msg_format, *args) + def warn(msg_format, *args): DEFAULT_LOGGER.warn(msg_format, *args) + def error(msg_format, *args): DEFAULT_LOGGER.error(msg_format, *args) + def log(level, msg_format, *args): DEFAULT_LOGGER.log(level, msg_format, args) + def _create_logger_appender(appender_type, level=LogLevel.INFO, path=None): if appender_type == AppenderType.CONSOLE: return ConsoleAppender(level, path) @@ -180,6 +210,8 @@ def _create_logger_appender(appender_type, level=LogLevel.INFO, path=None): return FileAppender(level, path) elif appender_type == AppenderType.STDOUT: return StdoutAppender(level) + elif appender_type == AppenderType.TELEMETRY: + return TelemetryAppender(level, path) else: raise ValueError("Unknown appender type") diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index f0b1050..16b7444 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -57,10 +57,21 @@ IPTABLES_VERSION = "iptables --version" IPTABLES_LOCKING_VERSION = FlexibleVersion('1.4.21') FIREWALL_ACCEPT = "iptables {0} -t security -{1} OUTPUT -d {2} -p tcp -m owner --uid-owner {3} -j ACCEPT" -FIREWALL_DROP = "iptables {0} -t security -{1} OUTPUT -d {2} -p tcp -j DROP" -FIREWALL_LIST = "iptables {0} -t security -L" +# Note: +# -- Initially "flight" the change to ACCEPT packets and develop a metric baseline +# A subsequent release will convert the ACCEPT to DROP +FIREWALL_DROP = "iptables {0} -t security -{1} OUTPUT -d {2} -p tcp -m conntrack --ctstate INVALID,NEW -j ACCEPT" +# FIREWALL_DROP = "iptables {0} -t security -{1} OUTPUT -d {2} -p tcp -m conntrack --ctstate INVALID,NEW -j DROP" +FIREWALL_LIST = "iptables {0} -t security -L -nxv" +FIREWALL_PACKETS = "iptables {0} -t security -L OUTPUT --zero OUTPUT -nxv" FIREWALL_FLUSH = "iptables {0} -t security --flush" +# Precisely delete the rules created by the agent. +FIREWALL_DELETE_CONNTRACK = "iptables {0} -t security -D OUTPUT -d {1} -p tcp -m conntrack --ctstate INVALID,NEW -j ACCEPT" +FIREWALL_DELETE_OWNER = "iptables {0} -t security -D OUTPUT -d {1} -p tcp -m owner --uid-owner {2} -j ACCEPT" + +PACKET_PATTERN = "^\s*(\d+)\s+(\d+)\s+DROP\s+.*{0}[^\d]*$" + _enable_firewall = True DMIDECODE_CMD = 'dmidecode --string system-uuid' @@ -69,13 +80,45 @@ UUID_PATTERN = re.compile( r'^\s*[A-F0-9]{8}(?:\-[A-F0-9]{4}){3}\-[A-F0-9]{12}\s*$', re.IGNORECASE) -class DefaultOSUtil(object): +class DefaultOSUtil(object): def __init__(self): self.agent_conf_file_path = '/etc/waagent.conf' self.selinux = None self.disable_route_warning = False + def get_firewall_dropped_packets(self, dst_ip=None): + # If a previous attempt failed, do not retry + global _enable_firewall + if not _enable_firewall: + return 0 + + try: + wait = self.get_firewall_will_wait() + + rc, output = shellutil.run_get_output(FIREWALL_PACKETS.format(wait)) + if rc == 3: + # Transient error that we ignore. This code fires every loop + # of the daemon (60m), so we will get the value eventually. + return 0 + + if rc != 0: + return -1 + + pattern = re.compile(PACKET_PATTERN.format(dst_ip)) + for line in output.split('\n'): + m = pattern.match(line) + if m is not None: + return int(m.group(1)) + + return 0 + + except Exception as e: + _enable_firewall = False + logger.warn("Unable to retrieve firewall packets dropped" + "{0}".format(ustr(e))) + return -1 + def get_firewall_will_wait(self): # Determine if iptables will serialize access rc, output = shellutil.run_get_output(IPTABLES_VERSION) @@ -95,31 +138,46 @@ class DefaultOSUtil(object): else "" return wait - def remove_firewall(self): - # If a previous attempt threw an exception, do not retry + def _delete_rule(self, rule): + """ + Continually execute the delete operation until the return + code is non-zero or the limit has been reached. + """ + for i in range(1, 100): + rc = shellutil.run(rule, chk_err=False) + if rc == 1: + return + elif rc == 2: + raise Exception("invalid firewall deletion rule '{0}'".format(rule)) + + def remove_firewall(self, dst_ip=None, uid=None): + # If a previous attempt failed, do not retry global _enable_firewall if not _enable_firewall: return False try: + if dst_ip is None or uid is None: + msg = "Missing arguments to enable_firewall" + logger.warn(msg) + raise Exception(msg) + wait = self.get_firewall_will_wait() - flush_rule = FIREWALL_FLUSH.format(wait) - if shellutil.run(flush_rule, chk_err=False) != 0: - logger.warn("Failed to flush firewall") + self._delete_rule(FIREWALL_DELETE_CONNTRACK.format(wait, dst_ip)) + self._delete_rule(FIREWALL_DELETE_OWNER.format(wait, dst_ip, uid)) return True except Exception as e: _enable_firewall = False - logger.info("Unable to flush firewall -- " + logger.info("Unable to remove firewall -- " "no further attempts will be made: " "{0}".format(ustr(e))) return False def enable_firewall(self, dst_ip=None, uid=None): - - # If a previous attempt threw an exception, do not retry + # If a previous attempt failed, do not retry global _enable_firewall if not _enable_firewall: return False @@ -134,10 +192,15 @@ class DefaultOSUtil(object): # If the DROP rule exists, make no changes drop_rule = FIREWALL_DROP.format(wait, "C", dst_ip) - - if shellutil.run(drop_rule, chk_err=False) == 0: + rc = shellutil.run(drop_rule, chk_err=False) + if rc == 0: logger.verbose("Firewall appears established") return True + elif rc == 2: + self.remove_firewall(dst_ip, uid) + msg = "please upgrade iptables to a version that supports the -C option" + logger.warn(msg) + raise Exception(msg) # Otherwise, append both rules accept_rule = FIREWALL_ACCEPT.format(wait, "A", dst_ip, uid) @@ -309,11 +372,11 @@ class DefaultOSUtil(object): else: sudoer = "{0} ALL=(ALL) ALL".format(username) if not os.path.isfile(sudoers_wagent) or \ - fileutil.findstr_in_file(sudoers_wagent, sudoer) is None: + fileutil.findstr_in_file(sudoers_wagent, sudoer) is False: fileutil.append_file(sudoers_wagent, "{0}\n".format(sudoer)) fileutil.chmod(sudoers_wagent, 0o440) else: - #Remove user from sudoers + # remove user from sudoers if os.path.isfile(sudoers_wagent): try: content = fileutil.read_file(sudoers_wagent) @@ -440,7 +503,7 @@ class DefaultOSUtil(object): conf_file = fileutil.read_file(conf_file_path).split("\n") textutil.set_ssh_config(conf_file, "PasswordAuthentication", option) textutil.set_ssh_config(conf_file, "ChallengeResponseAuthentication", option) - textutil.set_ssh_config(conf_file, "ClientAliveInterval", "180") + textutil.set_ssh_config(conf_file, "ClientAliveInterval", str(conf.get_ssh_client_alive_interval())) fileutil.write_file(conf_file_path, "\n".join(conf_file)) logger.info("{0} SSH password-based authentication methods." .format("Disabled" if disable_password else "Enabled")) @@ -965,7 +1028,7 @@ class DefaultOSUtil(object): if not os.path.exists(hostname_record): # this file is created at provisioning time with agents >= 2.2.3 hostname = socket.gethostname() - logger.warn('Hostname record does not exist, ' + logger.info('Hostname record does not exist, ' 'creating [{0}] with hostname [{1}]', hostname_record, hostname) diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py index 43aa6a7..1b4e2cb 100644 --- a/azurelinuxagent/common/osutil/factory.py +++ b/azurelinuxagent/common/osutil/factory.py @@ -27,7 +27,8 @@ from .freebsd import FreeBSDOSUtil from .openbsd import OpenBSDOSUtil from .redhat import RedhatOSUtil, Redhat6xOSUtil from .suse import SUSEOSUtil, SUSE11OSUtil -from .ubuntu import UbuntuOSUtil, Ubuntu12OSUtil, Ubuntu14OSUtil, UbuntuSnappyOSUtil +from .ubuntu import UbuntuOSUtil, Ubuntu12OSUtil, Ubuntu14OSUtil, \ + UbuntuSnappyOSUtil, Ubuntu16OSUtil from .alpine import AlpineOSUtil from .bigip import BigIpOSUtil from .gaia import GaiaOSUtil @@ -46,10 +47,12 @@ def get_osutil(distro_name=DISTRO_NAME, return ClearLinuxUtil() if distro_name == "ubuntu": - if Version(distro_version) == Version("12.04") or Version(distro_version) == Version("12.10"): + if Version(distro_version) in [Version("12.04"), Version("12.10")]: return Ubuntu12OSUtil() - elif Version(distro_version) == Version("14.04") or Version(distro_version) == Version("14.10"): + elif Version(distro_version) in [Version("14.04"), Version("14.10")]: return Ubuntu14OSUtil() + elif Version(distro_version) in [Version('16.04'), Version('16.10'), Version('17.04')]: + return Ubuntu16OSUtil() elif distro_full_name == "Snappy Ubuntu Core": return UbuntuSnappyOSUtil() else: diff --git a/azurelinuxagent/common/osutil/ubuntu.py b/azurelinuxagent/common/osutil/ubuntu.py index 3c353cf..8dacc75 100644 --- a/azurelinuxagent/common/osutil/ubuntu.py +++ b/azurelinuxagent/common/osutil/ubuntu.py @@ -16,9 +16,14 @@ # Requires Python 2.4+ and Openssl 1.0+ # +import time + +import azurelinuxagent.common.logger as logger import azurelinuxagent.common.utils.shellutil as shellutil + from azurelinuxagent.common.osutil.default import DefaultOSUtil + class Ubuntu14OSUtil(DefaultOSUtil): def __init__(self): super(Ubuntu14OSUtil, self).__init__() @@ -41,6 +46,7 @@ class Ubuntu14OSUtil(DefaultOSUtil): def get_dhcp_lease_endpoint(self): return self.get_endpoint_from_leases_path('/var/lib/dhcp/dhclient.*.leases') + class Ubuntu12OSUtil(Ubuntu14OSUtil): def __init__(self): super(Ubuntu12OSUtil, self).__init__() @@ -50,9 +56,13 @@ class Ubuntu12OSUtil(Ubuntu14OSUtil): ret = shellutil.run_get_output("pidof dhclient3", chk_err=False) return ret[1] if ret[0] == 0 else None -class UbuntuOSUtil(Ubuntu14OSUtil): + +class Ubuntu16OSUtil(Ubuntu14OSUtil): + """ + Ubuntu 16.04, 16.10, and 17.04. + """ def __init__(self): - super(UbuntuOSUtil, self).__init__() + super(Ubuntu16OSUtil, self).__init__() def register_agent_service(self): return shellutil.run("systemctl unmask walinuxagent", chk_err=False) @@ -60,6 +70,29 @@ class UbuntuOSUtil(Ubuntu14OSUtil): def unregister_agent_service(self): return shellutil.run("systemctl mask walinuxagent", chk_err=False) + +class UbuntuOSUtil(Ubuntu16OSUtil): + def __init__(self): + super(UbuntuOSUtil, self).__init__() + + def restart_if(self, ifname, retries=3, wait=5): + """ + Restart an interface by bouncing the link. systemd-networkd observes + this event, and forces a renew of DHCP. + """ + retry_limit=retries+1 + for attempt in range(1, retry_limit): + return_code=shellutil.run("ip link set {0} down && ip link set {0} up".format(ifname)) + if return_code == 0: + return + logger.warn("failed to restart {0}: return code {1}".format(ifname, return_code)) + if attempt < retry_limit: + logger.info("retrying in {0} seconds".format(wait)) + time.sleep(wait) + else: + logger.warn("exceeded restart retries") + + class UbuntuSnappyOSUtil(Ubuntu14OSUtil): def __init__(self): super(UbuntuSnappyOSUtil, self).__init__() diff --git a/azurelinuxagent/common/protocol/metadata.py b/azurelinuxagent/common/protocol/metadata.py index 4de7ecf..0cfa5ed 100644 --- a/azurelinuxagent/common/protocol/metadata.py +++ b/azurelinuxagent/common/protocol/metadata.py @@ -173,6 +173,11 @@ class MetadataProtocol(Protocol): return None return self.certs + def get_incarnation(self): + # Always return 0 since Azure Stack does not maintain goal state + # incarnation identifiers + return 0 + def get_vmagent_manifests(self): self.update_goal_state() @@ -235,7 +240,7 @@ class MetadataProtocol(Protocol): set_properties("extensionHandlers", ext_list.extHandlers, data) return ext_list, etag - def get_ext_handler_pkgs(self, ext_handler): + def get_ext_handler_pkgs(self, ext_handler, etag): logger.verbose("Get extension handler packages") pkg_list = ExtHandlerPackageList() diff --git a/azurelinuxagent/common/protocol/restapi.py b/azurelinuxagent/common/protocol/restapi.py index 1ec3e21..5a678d7 100644 --- a/azurelinuxagent/common/protocol/restapi.py +++ b/azurelinuxagent/common/protocol/restapi.py @@ -162,6 +162,7 @@ class ExtHandlerProperties(DataContract): def __init__(self): self.version = None self.upgradePolicy = None + self.upgradeGuid = None self.state = None self.extensions = DataContractList(Extension) @@ -245,11 +246,13 @@ class ExtHandlerStatus(DataContract): def __init__(self, name=None, version=None, + upgradeGuid=None, status=None, code=0, message=None): self.name = name self.version = version + self.upgradeGuid = upgradeGuid self.status = status self.code = code self.message = message @@ -300,6 +303,9 @@ class Protocol(DataContract): def get_certs(self): raise NotImplementedError() + def get_incarnation(self): + raise NotImplementedError() + def get_vmagent_manifests(self): raise NotImplementedError() @@ -309,15 +315,15 @@ class Protocol(DataContract): def get_ext_handlers(self): raise NotImplementedError() - def get_ext_handler_pkgs(self, extension): + def get_ext_handler_pkgs(self, extension, etag): raise NotImplementedError() def get_artifacts_profile(self): raise NotImplementedError() - def download_ext_handler_pkg(self, uri, headers=None): + def download_ext_handler_pkg(self, uri, headers=None, use_proxy=True): try: - resp = restutil.http_get(uri, use_proxy=True, headers=headers) + resp = restutil.http_get(uri, headers=headers, use_proxy=use_proxy) if restutil.request_succeeded(resp): return resp.read() except Exception as e: diff --git a/azurelinuxagent/common/protocol/util.py b/azurelinuxagent/common/protocol/util.py index 3071d7a..b3ec373 100644 --- a/azurelinuxagent/common/protocol/util.py +++ b/azurelinuxagent/common/protocol/util.py @@ -26,16 +26,18 @@ import threading import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger +import azurelinuxagent.common.utils.fileutil as fileutil + from azurelinuxagent.common.exception import ProtocolError, OSUtilError, \ ProtocolNotFoundError, DhcpError from azurelinuxagent.common.future import ustr -import azurelinuxagent.common.utils.fileutil as fileutil from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.dhcp import get_dhcp_handler from azurelinuxagent.common.protocol.ovfenv import OvfEnv from azurelinuxagent.common.protocol.wire import WireProtocol from azurelinuxagent.common.protocol.metadata import MetadataProtocol, \ METADATA_ENDPOINT +from azurelinuxagent.common.utils.restutil import IOErrorCounter OVF_FILE_NAME = "ovf-env.xml" TAG_FILE_NAME = "useMetadataEndpoint.tag" @@ -175,17 +177,20 @@ class ProtocolUtil(object): self.clear_protocol() for retry in range(0, MAX_RETRY): - for protocol in protocols: + for protocol_name in protocols: try: - if protocol == "WireProtocol": - return self._detect_wire_protocol() - - if protocol == "MetadataProtocol": - return self._detect_metadata_protocol() + protocol = self._detect_wire_protocol() \ + if protocol_name == "WireProtocol" \ + else self._detect_metadata_protocol() + + IOErrorCounter.set_protocol_endpoint( + endpoint=protocol.endpoint) + + return protocol except ProtocolError as e: logger.info("Protocol endpoint not found: {0}, {1}", - protocol, e) + protocol_name, e) if retry < MAX_RETRY -1: logger.info("Retry detect protocols: retry={0}", retry) diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 4f3b7e0..963d33c 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -18,6 +18,7 @@ import json import os +import random import re import time import xml.sax.saxutils as saxutils @@ -27,7 +28,7 @@ import azurelinuxagent.common.utils.fileutil as fileutil import azurelinuxagent.common.utils.textutil as textutil from azurelinuxagent.common.exception import ProtocolNotFoundError, \ - ResourceGoneError + ResourceGoneError, RestartError from azurelinuxagent.common.future import httpclient, bytebuffer from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.protocol.restapi import * @@ -51,6 +52,7 @@ P7M_FILE_NAME = "Certificates.p7m" PEM_FILE_NAME = "Certificates.pem" EXT_CONF_FILE_NAME = "ExtensionsConfig.{0}.xml" MANIFEST_FILE_NAME = "{0}.{1}.manifest.xml" +AGENTS_MANIFEST_FILE_NAME = "{0}.{1}.agentsManifest" TRANSPORT_CERT_FILE_NAME = "TransportCert.pem" TRANSPORT_PRV_FILE_NAME = "TransportPrivate.pem" @@ -58,17 +60,12 @@ PROTOCOL_VERSION = "2012-11-30" ENDPOINT_FINE_NAME = "WireServer" SHORT_WAITING_INTERVAL = 1 # 1 second -LONG_WAITING_INTERVAL = 15 # 15 seconds class UploadError(HttpError): pass -class WireProtocolResourceGone(ProtocolError): - pass - - class WireProtocol(Protocol): """Slim layer to adapt wire protocol data to metadata protocol interface""" @@ -119,6 +116,13 @@ class WireProtocol(Protocol): certificates = self.client.get_certs() return certificates.cert_list + def get_incarnation(self): + path = os.path.join(conf.get_lib_dir(), INCARNATION_FILE_NAME) + if os.path.exists(path): + return fileutil.read_file(path) + else: + return 0 + def get_vmagent_manifests(self): # Update goal state to get latest extensions config self.update_goal_state() @@ -128,8 +132,9 @@ class WireProtocol(Protocol): def get_vmagent_pkgs(self, vmagent_manifest): goal_state = self.client.get_goal_state() - man = self.client.get_gafamily_manifest(vmagent_manifest, goal_state) - return man.pkg_list + ga_manifest = self.client.get_gafamily_manifest(vmagent_manifest, goal_state) + valid_pkg_list = self.client.filter_package_list(vmagent_manifest.family, ga_manifest, goal_state) + return valid_pkg_list def get_ext_handlers(self): logger.verbose("Get extension handler config") @@ -140,10 +145,9 @@ class WireProtocol(Protocol): # In wire protocol, incarnation is equivalent to ETag return ext_conf.ext_handlers, goal_state.incarnation - def get_ext_handler_pkgs(self, ext_handler): + def get_ext_handler_pkgs(self, ext_handler, etag): logger.verbose("Get extension handler package") - goal_state = self.client.get_goal_state() - man = self.client.get_ext_manifest(ext_handler, goal_state) + man = self.client.get_ext_manifest(ext_handler, etag) return man.pkg_list def get_artifacts_profile(self): @@ -156,10 +160,10 @@ class WireProtocol(Protocol): if package is not None: return package else: - logger.warn("Download did not succeed, falling back to host plugin") + logger.verbose("Download did not succeed, falling back to host plugin") host = self.client.get_host_plugin() uri, headers = host.get_artifact_request(uri, host.manifest_uri) - package = super(WireProtocol, self).download_ext_handler_pkg(uri, headers=headers) + package = super(WireProtocol, self).download_ext_handler_pkg(uri, headers=headers, use_proxy=False) return package def report_provision_status(self, provision_status): @@ -248,22 +252,28 @@ Convert VMStatus object to status blob format """ +def ga_status_to_guest_info(ga_status): + v1_ga_guest_info = { + "computerName" : ga_status.hostname, + "osName" : ga_status.osname, + "osVersion" : ga_status.osversion, + "version" : ga_status.version, + } + return v1_ga_guest_info + + def ga_status_to_v1(ga_status): formatted_msg = { 'lang': 'en-US', 'message': ga_status.message } v1_ga_status = { - 'version': ga_status.version, - 'status': ga_status.status, - 'osversion': ga_status.osversion, - 'osname': ga_status.osname, - 'hostname': ga_status.hostname, - 'formattedMessage': formatted_msg + "version" : ga_status.version, + "status" : ga_status.status, + "formattedMessage" : formatted_msg } return v1_ga_status - def ext_substatus_to_v1(sub_status_list): status_list = [] for substatus in sub_status_list: @@ -318,6 +328,9 @@ def ext_handler_status_to_v1(handler_status, ext_statuses, timestamp): "message": handler_status.message } + if handler_status.upgradeGuid is not None: + v1_handler_status["upgradeGuid"] = handler_status.upgradeGuid + if len(handler_status.extensions) > 0: # Currently, no more than one extension per handler ext_name = handler_status.extensions[0] @@ -334,6 +347,7 @@ def ext_handler_status_to_v1(handler_status, ext_statuses, timestamp): def vm_status_to_v1(vm_status, ext_statuses): timestamp = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + v1_ga_guest_info = ga_status_to_guest_info(vm_status.vmAgent) v1_ga_status = ga_status_to_v1(vm_status.vmAgent) v1_handler_status_list = [] for handler_status in vm_status.vmAgent.extensionHandlers: @@ -349,7 +363,8 @@ def vm_status_to_v1(vm_status, ext_statuses): v1_vm_status = { 'version': '1.1', 'timestampUTC': timestamp, - 'aggregateStatus': v1_agg_status + 'aggregateStatus': v1_agg_status, + 'guestOSInfo' : v1_ga_guest_info } return v1_vm_status @@ -512,51 +527,29 @@ class WireClient(object): self.shared_conf = None self.certs = None self.ext_conf = None - self.last_request = 0 - self.req_count = 0 self.host_plugin = None self.status_blob = StatusBlob(self) - def prevent_throttling(self): - """ - Try to avoid throttling of wire server - """ - now = time.time() - if now - self.last_request < 1: - logger.verbose("Last request issued less than 1 second ago") - logger.verbose("Sleep {0} second to avoid throttling.", - SHORT_WAITING_INTERVAL) - time.sleep(SHORT_WAITING_INTERVAL) - self.last_request = now - - self.req_count += 1 - if self.req_count % 3 == 0: - logger.verbose("Sleep {0} second to avoid throttling.", - SHORT_WAITING_INTERVAL) - time.sleep(SHORT_WAITING_INTERVAL) - self.req_count = 0 - def call_wireserver(self, http_req, *args, **kwargs): - self.prevent_throttling() - try: # Never use the HTTP proxy for wireserver kwargs['use_proxy'] = False resp = http_req(*args, **kwargs) + + if restutil.request_failed(resp): + msg = "[Wireserver Failed] URI {0} ".format(args[0]) + if resp is not None: + msg += " [HTTP Failed] Status Code {0}".format(resp.status) + raise ProtocolError(msg) + + # If the GoalState is stale, pass along the exception to the caller + except ResourceGoneError: + raise + except Exception as e: raise ProtocolError("[Wireserver Exception] {0}".format( ustr(e))) - if resp is not None and resp.status == httpclient.GONE: - msg = args[0] if len(args) > 0 else "" - raise WireProtocolResourceGone(msg) - - elif restutil.request_failed(resp): - msg = "[Wireserver Failed] URI {0} ".format(args[0]) - if resp is not None: - msg += " [HTTP Failed] Status Code {0}".format(resp.status) - raise ProtocolError(msg) - return resp def decode_config(self, data): @@ -598,7 +591,10 @@ class WireClient(object): def fetch_manifest(self, version_uris): logger.verbose("Fetch manifest") - for version in version_uris: + version_uris_shuffled = version_uris + random.shuffle(version_uris_shuffled) + + for version in version_uris_shuffled: response = None if not HostPluginProtocol.is_default_channel(): response = self.fetch(version.uri) @@ -698,26 +694,28 @@ class WireClient(object): self.ext_conf = ExtensionsConfig(xml_text) def update_goal_state(self, forced=False, max_retry=3): - uri = GOAL_STATE_URI.format(self.endpoint) - xml_text = self.fetch_config(uri, self.get_header()) - goal_state = GoalState(xml_text) - incarnation_file = os.path.join(conf.get_lib_dir(), INCARNATION_FILE_NAME) + uri = GOAL_STATE_URI.format(self.endpoint) - if not forced: - last_incarnation = None - if os.path.isfile(incarnation_file): - last_incarnation = fileutil.read_file(incarnation_file) - new_incarnation = goal_state.incarnation - if last_incarnation is not None and \ - last_incarnation == new_incarnation: - # Goalstate is not updated. - return - - # Start updating goalstate, retry on 410 + goal_state = None for retry in range(0, max_retry): try: + if goal_state is None: + xml_text = self.fetch_config(uri, self.get_header()) + goal_state = GoalState(xml_text) + + if not forced: + last_incarnation = None + if os.path.isfile(incarnation_file): + last_incarnation = fileutil.read_file( + incarnation_file) + new_incarnation = goal_state.incarnation + if last_incarnation is not None and \ + last_incarnation == new_incarnation: + # Goalstate is not updated. + return + self.goal_state = goal_state file_name = GOAL_STATE_FILE_NAME.format(goal_state.incarnation) goal_state_file = os.path.join(conf.get_lib_dir(), file_name) @@ -727,21 +725,29 @@ class WireClient(object): self.update_certs(goal_state) self.update_ext_conf(goal_state) self.save_cache(incarnation_file, goal_state.incarnation) + if self.host_plugin is not None: self.host_plugin.container_id = goal_state.container_id self.host_plugin.role_config_name = goal_state.role_config_name + return - except ProtocolError: + except ResourceGoneError: + logger.info("GoalState is stale -- re-fetching") + goal_state = None + + except Exception as e: + log_method = logger.info \ + if type(e) is ProtocolError \ + else logger.warn + log_method( + "Exception processing GoalState-related files: {0}".format( + ustr(e))) + if retry < max_retry-1: continue raise - except WireProtocolResourceGone: - logger.info("Incarnation is out of date. Update goalstate.") - xml_text = self.fetch_config(uri, self.get_header()) - goal_state = GoalState(xml_text) - raise ProtocolError("Exceeded max retry updating goal state") def get_goal_state(self): @@ -793,25 +799,70 @@ class WireClient(object): self.ext_conf = ExtensionsConfig(xml_text) return self.ext_conf - def get_ext_manifest(self, ext_handler, goal_state): + def get_ext_manifest(self, ext_handler, incarnation): + for update_goal_state in [False, True]: try: if update_goal_state: self.update_goal_state(forced=True) - goal_state = self.get_goal_state() + incarnation = self.get_goal_state().incarnation local_file = MANIFEST_FILE_NAME.format( ext_handler.name, - goal_state.incarnation) + incarnation) local_file = os.path.join(conf.get_lib_dir(), local_file) - xml_text = self.fetch_manifest(ext_handler.versionUris) - self.save_cache(local_file, xml_text) + + xml_text = None + if not update_goal_state: + try: + xml_text = self.fetch_cache(local_file) + except ProtocolError: + pass + + if xml_text is None: + xml_text = self.fetch_manifest(ext_handler.versionUris) + self.save_cache(local_file, xml_text) return ExtensionManifest(xml_text) except ResourceGoneError: continue - raise ProtocolError("Failed to retrieve extension manifest") + raise RestartError("Failed to retrieve extension manifest") + + def filter_package_list(self, family, ga_manifest, goal_state): + complete_list = ga_manifest.pkg_list + agent_manifest = os.path.join(conf.get_lib_dir(), + AGENTS_MANIFEST_FILE_NAME.format( + family, + goal_state.incarnation)) + + if not os.path.exists(agent_manifest): + # clear memory cache + ga_manifest.allowed_versions = None + + # create disk cache + with open(agent_manifest, mode='w') as manifest_fh: + for version in complete_list.versions: + manifest_fh.write('{0}\n'.format(version.version)) + fileutil.chmod(agent_manifest, 0o644) + + return complete_list + + else: + # use allowed versions from cache, otherwise from disk + if ga_manifest.allowed_versions is None: + with open(agent_manifest, mode='r') as manifest_fh: + ga_manifest.allowed_versions = [v.strip('\n') for v + in manifest_fh.readlines()] + + # use the updated manifest urls for allowed versions + allowed_list = ExtHandlerPackageList() + allowed_list.versions = [version for version + in complete_list.versions + if version.version + in ga_manifest.allowed_versions] + + return allowed_list def get_gafamily_manifest(self, vmagent_manifest, goal_state): for update_goal_state in [False, True]: @@ -844,7 +895,7 @@ class WireClient(object): logger.info("Wire protocol version:{0}", PROTOCOL_VERSION) elif PROTOCOL_VERSION in version_info.get_supported(): logger.info("Wire protocol version:{0}", PROTOCOL_VERSION) - logger.warn("Server preferred version:{0}", preferred) + logger.info("Server preferred version:{0}", preferred) else: error = ("Agent supported wire protocol version: {0} was not " "advised by Fabric.").format(PROTOCOL_VERSION) @@ -1360,6 +1411,9 @@ class ExtensionsConfig(object): ext_handler.properties.version = getattrib(plugin, "version") ext_handler.properties.state = getattrib(plugin, "state") + ext_handler.properties.upgradeGuid = getattrib(plugin, "upgradeGuid") + if not ext_handler.properties.upgradeGuid: + ext_handler.properties.upgradeGuid = None auto_upgrade = getattrib(plugin, "autoUpgrade") if auto_upgrade is not None and auto_upgrade.lower() == "true": ext_handler.properties.upgradePolicy = "auto" @@ -1418,6 +1472,7 @@ class ExtensionManifest(object): raise ValueError("ExtensionManifest is None") logger.verbose("Load ExtensionManifest.xml") self.pkg_list = ExtHandlerPackageList() + self.allowed_versions = None self.parse(xml_text) def parse(self, xml_text): diff --git a/azurelinuxagent/common/utils/cryptutil.py b/azurelinuxagent/common/utils/cryptutil.py index 6339eb3..b34c1a5 100644 --- a/azurelinuxagent/common/utils/cryptutil.py +++ b/azurelinuxagent/common/utils/cryptutil.py @@ -19,8 +19,11 @@ import base64 import struct + from azurelinuxagent.common.future import ustr, bytebuffer from azurelinuxagent.common.exception import CryptError + +import azurelinuxagent.common.logger as logger import azurelinuxagent.common.utils.shellutil as shellutil class CryptUtil(object): @@ -34,7 +37,10 @@ class CryptUtil(object): cmd = ("{0} req -x509 -nodes -subj /CN=LinuxTransport -days 730 " "-newkey rsa:2048 -keyout {1} " "-out {2}").format(self.openssl_cmd, prv_file, crt_file) - shellutil.run(cmd) + rc = shellutil.run(cmd) + if rc != 0: + logger.error("Failed to create {0} and {1} certificates".format( + prv_file, crt_file)) def get_pubkey_from_prv(self, file_name): cmd = "{0} rsa -in {1} -pubout 2>/dev/null".format(self.openssl_cmd, @@ -61,6 +67,9 @@ class CryptUtil(object): "").format(self.openssl_cmd, p7m_file, trans_prv_file, trans_cert_file, self.openssl_cmd, pem_file) shellutil.run(cmd) + rc = shellutil.run(cmd) + if rc != 0: + logger.error("Failed to decrypt {0}".format(p7m_file)) def crt_to_ssh(self, input_file, output_file): shellutil.run("ssh-keygen -i -m PKCS8 -f {0} >> {1}".format(input_file, diff --git a/azurelinuxagent/common/utils/fileutil.py b/azurelinuxagent/common/utils/fileutil.py index 96b5b82..1f0c7ac 100644 --- a/azurelinuxagent/common/utils/fileutil.py +++ b/azurelinuxagent/common/utils/fileutil.py @@ -27,7 +27,6 @@ import os import pwd import re import shutil -import string import azurelinuxagent.common.logger as logger import azurelinuxagent.common.utils.textutil as textutil @@ -42,9 +41,10 @@ KNOWN_IOERRORS = [ errno.ENOSPC, # Out of space errno.ENAMETOOLONG, # Name too long errno.ELOOP, # Too many symbolic links encountered - errno.EREMOTEIO # Remote I/O error + 121 # Remote I/O error (errno.EREMOTEIO -- not present in all Python 2.7+) ] + def copy_file(from_path, to_path=None, to_dir=None): if to_path is None: to_path = os.path.join(to_dir, os.path.basename(from_path)) @@ -66,11 +66,12 @@ def read_file(filepath, asbin=False, remove_bom=False, encoding='utf-8'): return data if remove_bom: - #Remove bom on bytes data before it is converted into string. + # remove bom on bytes data before it is converted into string. data = textutil.remove_bom(data) data = ustr(data, encoding=encoding) return data + def write_file(filepath, contents, asbin=False, encoding='utf-8', append=False): """ Write 'contents' to 'filepath'. @@ -82,6 +83,7 @@ def write_file(filepath, contents, asbin=False, encoding='utf-8', append=False): with open(filepath, mode) as out_file: out_file.write(data) + def append_file(filepath, contents, asbin=False, encoding='utf-8'): """ Append 'contents' to 'filepath'. @@ -93,6 +95,7 @@ def base_name(path): head, tail = os.path.split(path) return tail + def get_line_startingwith(prefix, filepath): """ Return line from 'filepath' if the line startswith 'prefix' @@ -102,7 +105,6 @@ def get_line_startingwith(prefix, filepath): return line return None -#End File operation util functions def mkdir(dirpath, mode=None, owner=None): if not os.path.isdir(dirpath): @@ -112,6 +114,7 @@ def mkdir(dirpath, mode=None, owner=None): if owner is not None: chowner(dirpath, owner) + def chowner(path, owner): if not os.path.exists(path): logger.error("Path does not exist: {0}".format(path)) @@ -119,19 +122,22 @@ def chowner(path, owner): owner_info = pwd.getpwnam(owner) os.chown(path, owner_info[2], owner_info[3]) + def chmod(path, mode): if not os.path.exists(path): logger.error("Path does not exist: {0}".format(path)) else: os.chmod(path, mode) + def rm_files(*args): for paths in args: - #Find all possible file paths + # find all possible file paths for path in glob.glob(paths): if os.path.isfile(path): os.remove(path) + def rm_dirs(*args): """ Remove the contents of each directry @@ -149,20 +155,24 @@ def rm_dirs(*args): elif os.path.isdir(path): shutil.rmtree(path) + def trim_ext(path, ext): if not ext.startswith("."): ext = "." + ext return path.split(ext)[0] if path.endswith(ext) else path + def update_conf_file(path, line_start, val, chk_err=False): conf = [] if not os.path.isfile(path) and chk_err: raise IOError("Can't find config file:{0}".format(path)) conf = read_file(path).split('\n') - conf = [x for x in conf if x is not None and len(x) > 0 and not x.startswith(line_start)] + conf = [x for x in conf + if x is not None and len(x) > 0 and not x.startswith(line_start)] conf.append(val) write_file(path, '\n'.join(conf) + '\n') + def search_file(target_dir_name, target_file_name): for root, dirs, files in os.walk(target_dir_name): for file_name in files: @@ -170,24 +180,28 @@ def search_file(target_dir_name, target_file_name): return os.path.join(root, file_name) return None + def chmod_tree(path, mode): for root, dirs, files in os.walk(path): for file_name in files: os.chmod(os.path.join(root, file_name), mode) + def findstr_in_file(file_path, line_str): """ Return True if the line is in the file; False otherwise. - (Trailing whitespace is ignore.) + (Trailing whitespace is ignored.) """ try: for line in (open(file_path, 'r')).readlines(): if line_str == line.rstrip(): return True - except Exception as e: + except Exception: + # swallow exception pass return False + def findre_in_file(file_path, line_re): """ Return match object if found in file. @@ -203,6 +217,7 @@ def findre_in_file(file_path, line_re): return None + def get_all_files(root_path): """ Find all files under the given root path @@ -213,6 +228,7 @@ def get_all_files(root_path): return result + def clean_ioerror(e, paths=[]): """ Clean-up possibly bad files and directories after an IO error. @@ -228,5 +244,6 @@ def clean_ioerror(e, paths=[]): shutil.rmtree(path, ignore_errors=True) else: os.remove(path) - except Exception as e: + except Exception: + # swallow exception pass diff --git a/azurelinuxagent/common/utils/flexible_version.py b/azurelinuxagent/common/utils/flexible_version.py index 2fce88d..14c2a73 100644 --- a/azurelinuxagent/common/utils/flexible_version.py +++ b/azurelinuxagent/common/utils/flexible_version.py @@ -37,7 +37,7 @@ class FlexibleVersion(version.Version): self.prerelease = None self.version = () if vstring: - self._parse(vstring) + self._parse(str(vstring)) return _nn_version = 'version' diff --git a/azurelinuxagent/common/utils/restutil.py b/azurelinuxagent/common/utils/restutil.py index ddd930b..807be29 100644 --- a/azurelinuxagent/common/utils/restutil.py +++ b/azurelinuxagent/common/utils/restutil.py @@ -18,6 +18,7 @@ # import os +import threading import time import traceback @@ -32,10 +33,11 @@ from azurelinuxagent.common.version import PY_VERSION_MAJOR SECURE_WARNING_EMITTED = False -DEFAULT_RETRIES = 3 +DEFAULT_RETRIES = 6 +DELAY_IN_SECONDS = 1 -SHORT_DELAY_IN_SECONDS = 5 -LONG_DELAY_IN_SECONDS = 15 +THROTTLE_RETRIES = 25 +THROTTLE_DELAY_IN_SECONDS = 1 RETRY_CODES = [ httpclient.RESET_CONTENT, @@ -63,7 +65,8 @@ OK_CODES = [ THROTTLE_CODES = [ httpclient.FORBIDDEN, - httpclient.SERVICE_UNAVAILABLE + httpclient.SERVICE_UNAVAILABLE, + 429, # Request Rate Limit Exceeded ] RETRY_EXCEPTIONS = [ @@ -76,6 +79,48 @@ RETRY_EXCEPTIONS = [ HTTP_PROXY_ENV = "http_proxy" HTTPS_PROXY_ENV = "https_proxy" +DEFAULT_PROTOCOL_ENDPOINT='168.63.129.16' +HOST_PLUGIN_PORT = 32526 + + +class IOErrorCounter(object): + _lock = threading.RLock() + _protocol_endpoint = DEFAULT_PROTOCOL_ENDPOINT + _counts = {"hostplugin":0, "protocol":0, "other":0} + + @staticmethod + def increment(host=None, port=None): + with IOErrorCounter._lock: + if host == IOErrorCounter._protocol_endpoint: + if port == HOST_PLUGIN_PORT: + IOErrorCounter._counts["hostplugin"] += 1 + else: + IOErrorCounter._counts["protocol"] += 1 + else: + IOErrorCounter._counts["other"] += 1 + + @staticmethod + def get_and_reset(): + with IOErrorCounter._lock: + counts = IOErrorCounter._counts.copy() + IOErrorCounter.reset() + return counts + + @staticmethod + def reset(): + with IOErrorCounter._lock: + IOErrorCounter._counts = {"hostplugin":0, "protocol":0, "other":0} + + @staticmethod + def set_protocol_endpoint(endpoint=DEFAULT_PROTOCOL_ENDPOINT): + IOErrorCounter._protocol_endpoint = endpoint + + +def _compute_delay(retry_attempt=1, delay=DELAY_IN_SECONDS): + fib = (1, 1) + for n in range(retry_attempt): + fib = (fib[1], fib[0]+fib[1]) + return delay*fib[1] def _is_retry_status(status, retry_codes=RETRY_CODES): return status in retry_codes @@ -166,7 +211,7 @@ def http_request(method, use_proxy=False, max_retry=DEFAULT_RETRIES, retry_codes=RETRY_CODES, - retry_delay=SHORT_DELAY_IN_SECONDS): + retry_delay=DELAY_IN_SECONDS): global SECURE_WARNING_EMITTED @@ -208,18 +253,31 @@ def http_request(method, msg = '' attempt = 0 - delay = retry_delay + delay = 0 + was_throttled = False while attempt < max_retry: if attempt > 0: - logger.info("[HTTP Retry] Attempt {0} of {1}: {2}", + # Compute the request delay + # -- Use a fixed delay if the server ever rate-throttles the request + # (with a safe, minimum number of retry attempts) + # -- Otherwise, compute a delay that is the product of the next + # item in the Fibonacci series and the initial delay value + delay = THROTTLE_DELAY_IN_SECONDS \ + if was_throttled \ + else _compute_delay(retry_attempt=attempt, + delay=retry_delay) + + logger.verbose("[HTTP Retry] " + "Attempt {0} of {1} will delay {2} seconds: {3}", attempt+1, max_retry, + delay, msg) + time.sleep(delay) attempt += 1 - delay = retry_delay try: resp = _http_request(method, @@ -235,13 +293,13 @@ def http_request(method, if request_failed(resp): if _is_retry_status(resp.status, retry_codes=retry_codes): - msg = '[HTTP Retry] HTTP {0} Status Code {1}'.format( - method, resp.status) + msg = '[HTTP Retry] {0} {1} -- Status Code {2}'.format( + method, url, resp.status) + # Note if throttled and ensure a safe, minimum number of + # retry attempts if _is_throttle_status(resp.status): - delay = LONG_DELAY_IN_SECONDS - logger.info("[HTTP Delay] Delay {0} seconds for " \ - "Status Code {1}".format( - delay, resp.status)) + was_throttled = True + max_retry = max(max_retry, THROTTLE_RETRIES) continue if resp.status in RESOURCE_GONE_CODES: @@ -250,22 +308,25 @@ def http_request(method, return resp except httpclient.HTTPException as e: - msg = '[HTTP Failed] HTTP {0} HttpException {1}'.format(method, e) + msg = '[HTTP Failed] {0} {1} -- HttpException {2}'.format( + method, url, e) if _is_retry_exception(e): continue break except IOError as e: - msg = '[HTTP Failed] HTTP {0} IOError {1}'.format(method, e) + IOErrorCounter.increment(host=host, port=port) + msg = '[HTTP Failed] {0} {1} -- IOError {2}'.format( + method, url, e) continue - raise HttpError(msg) + raise HttpError("{0} -- {1} attempts made".format(msg,attempt)) def http_get(url, headers=None, use_proxy=False, max_retry=DEFAULT_RETRIES, retry_codes=RETRY_CODES, - retry_delay=SHORT_DELAY_IN_SECONDS): + retry_delay=DELAY_IN_SECONDS): return http_request("GET", url, None, headers=headers, use_proxy=use_proxy, @@ -277,7 +338,7 @@ def http_get(url, headers=None, use_proxy=False, def http_head(url, headers=None, use_proxy=False, max_retry=DEFAULT_RETRIES, retry_codes=RETRY_CODES, - retry_delay=SHORT_DELAY_IN_SECONDS): + retry_delay=DELAY_IN_SECONDS): return http_request("HEAD", url, None, headers=headers, use_proxy=use_proxy, @@ -289,7 +350,7 @@ def http_head(url, headers=None, use_proxy=False, def http_post(url, data, headers=None, use_proxy=False, max_retry=DEFAULT_RETRIES, retry_codes=RETRY_CODES, - retry_delay=SHORT_DELAY_IN_SECONDS): + retry_delay=DELAY_IN_SECONDS): return http_request("POST", url, data, headers=headers, use_proxy=use_proxy, @@ -301,7 +362,7 @@ def http_post(url, data, headers=None, use_proxy=False, def http_put(url, data, headers=None, use_proxy=False, max_retry=DEFAULT_RETRIES, retry_codes=RETRY_CODES, - retry_delay=SHORT_DELAY_IN_SECONDS): + retry_delay=DELAY_IN_SECONDS): return http_request("PUT", url, data, headers=headers, use_proxy=use_proxy, @@ -313,7 +374,7 @@ def http_put(url, data, headers=None, use_proxy=False, def http_delete(url, headers=None, use_proxy=False, max_retry=DEFAULT_RETRIES, retry_codes=RETRY_CODES, - retry_delay=SHORT_DELAY_IN_SECONDS): + retry_delay=DELAY_IN_SECONDS): return http_request("DELETE", url, None, headers=headers, use_proxy=use_proxy, diff --git a/azurelinuxagent/common/utils/shellutil.py b/azurelinuxagent/common/utils/shellutil.py index fff6aa8..7b8e0c9 100644 --- a/azurelinuxagent/common/utils/shellutil.py +++ b/azurelinuxagent/common/utils/shellutil.py @@ -84,16 +84,22 @@ def run_get_output(cmd, chk_err=True, log_cmd=True): output = ustr(output, encoding='utf-8', errors="backslashreplace") - except subprocess.CalledProcessError as e: - output = ustr(e.output, - encoding='utf-8', - errors="backslashreplace") - if chk_err: - if log_cmd: - logger.error(u"Command: '{0}'", e.cmd) - logger.error(u"Return code: {0}", e.returncode) - logger.error(u"Result: {0}", output) - return e.returncode, output + except Exception as e: + if type(e) is subprocess.CalledProcessError: + output = ustr(e.output, + encoding='utf-8', + errors="backslashreplace") + if chk_err: + if log_cmd: + logger.error(u"Command: '{0}'", e.cmd) + logger.error(u"Return code: {0}", e.returncode) + logger.error(u"Result: {0}", output) + return e.returncode, output + else: + logger.error( + u"'{0}' raised unexpected exception: '{1}'".format( + cmd, ustr(e))) + return -1, ustr(e) return 0, output diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index e1bb828..f743f11 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -113,7 +113,7 @@ def get_distro(): AGENT_NAME = "WALinuxAgent" AGENT_LONG_NAME = "Azure Linux Agent" -AGENT_VERSION = '2.2.17' +AGENT_VERSION = '2.2.21' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux @@ -161,14 +161,17 @@ def set_current_agent(): version = AGENT_VERSION return agent, FlexibleVersion(version) + def is_agent_package(path): path = os.path.basename(path) return not re.match(AGENT_PKG_PATTERN, path) is None + def is_agent_path(path): path = os.path.basename(path) return not re.match(AGENT_NAME_PATTERN, path) is None + CURRENT_AGENT, CURRENT_VERSION = set_current_agent() |