summaryrefslogtreecommitdiff
path: root/azurelinuxagent
diff options
context:
space:
mode:
Diffstat (limited to 'azurelinuxagent')
-rw-r--r--azurelinuxagent/agent.py18
-rw-r--r--azurelinuxagent/common/conf.py9
-rw-r--r--azurelinuxagent/common/errorstate.py31
-rw-r--r--azurelinuxagent/common/event.py92
-rw-r--r--azurelinuxagent/common/exception.py42
-rw-r--r--azurelinuxagent/common/future.py7
-rw-r--r--azurelinuxagent/common/logger.py34
-rw-r--r--azurelinuxagent/common/osutil/default.py97
-rw-r--r--azurelinuxagent/common/osutil/factory.py9
-rw-r--r--azurelinuxagent/common/osutil/ubuntu.py37
-rw-r--r--azurelinuxagent/common/protocol/metadata.py7
-rw-r--r--azurelinuxagent/common/protocol/restapi.py12
-rw-r--r--azurelinuxagent/common/protocol/util.py21
-rw-r--r--azurelinuxagent/common/protocol/wire.py221
-rw-r--r--azurelinuxagent/common/utils/cryptutil.py11
-rw-r--r--azurelinuxagent/common/utils/fileutil.py35
-rw-r--r--azurelinuxagent/common/utils/flexible_version.py2
-rw-r--r--azurelinuxagent/common/utils/restutil.py105
-rw-r--r--azurelinuxagent/common/utils/shellutil.py26
-rw-r--r--azurelinuxagent/common/version.py5
-rw-r--r--azurelinuxagent/ga/env.py147
-rw-r--r--azurelinuxagent/ga/exthandlers.py236
-rw-r--r--azurelinuxagent/ga/monitor.py43
-rw-r--r--azurelinuxagent/ga/update.py280
-rw-r--r--azurelinuxagent/pa/deprovision/default.py1
-rw-r--r--azurelinuxagent/pa/provision/cloudinit.py11
-rw-r--r--azurelinuxagent/pa/provision/default.py58
-rw-r--r--azurelinuxagent/pa/provision/factory.py2
-rw-r--r--azurelinuxagent/pa/rdma/suse.py29
29 files changed, 1159 insertions, 469 deletions
diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py
index e99f7be..4ab7aa0 100644
--- a/azurelinuxagent/agent.py
+++ b/azurelinuxagent/agent.py
@@ -63,6 +63,9 @@ class Agent(object):
path="/var/log/waagent.log")
logger.add_logger_appender(logger.AppenderType.CONSOLE, level,
path="/dev/console")
+ logger.add_logger_appender(logger.AppenderType.TELEMETRY,
+ logger.LogLevel.WARNING,
+ path=event.add_log_event)
ext_log_dir = conf.get_ext_log_dir()
try:
@@ -144,7 +147,7 @@ def main(args=[]):
if command == "version":
version()
elif command == "help":
- usage()
+ print(usage())
elif command == "start":
start(conf_file_path=conf_file_path)
else:
@@ -228,15 +231,16 @@ def version():
def usage():
"""
- Show agent usage
+ Return agent usage message
"""
- print("")
- print((("usage: {0} [-verbose] [-force] [-help] "
+ s = "\n"
+ s += ("usage: {0} [-verbose] [-force] [-help] "
"-configuration-path:<path to configuration file>"
"-deprovision[+user]|-register-service|-version|-daemon|-start|"
- "-run-exthandlers]"
- "").format(sys.argv[0])))
- print("")
+ "-run-exthandlers|-show-configuration]"
+ "").format(sys.argv[0])
+ s += "\n"
+ return s
def start(conf_file_path=None):
"""
diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py
index 75a0248..33f953a 100644
--- a/azurelinuxagent/common/conf.py
+++ b/azurelinuxagent/common/conf.py
@@ -40,11 +40,11 @@ class ConfigurationProvider(object):
raise AgentConfigError("Can't not parse empty configuration")
for line in content.split('\n'):
if not line.startswith("#") and "=" in line:
- parts = line.split('=')
+ parts = line.split('=', 1)
if len(parts) < 2:
continue
key = parts[0].strip()
- value = parts[1].strip("\" ")
+ value = parts[1].split('#')[0].strip("\" ").strip()
self.values[key] = value if value != "None" else None
def get(self, key, default_val):
@@ -131,12 +131,14 @@ __STRING_OPTIONS__ = {
}
__INTEGER_OPTIONS__ = {
+ "OS.SshClientAliveInterval" : 180,
"Provisioning.PasswordCryptSaltLength" : 10,
"HttpProxy.Port" : None,
"ResourceDisk.SwapSizeMB" : 0,
"Autoupdate.Frequency" : 3600
}
+
def get_configuration(conf=__conf__):
options = {}
for option in __SWITCH_OPTIONS__:
@@ -190,6 +192,9 @@ def get_fips_enabled(conf=__conf__):
def get_openssl_cmd(conf=__conf__):
return conf.get("OS.OpensslPath", "/usr/bin/openssl")
+def get_ssh_client_alive_interval(conf=__conf__):
+ return conf.get("OS.SshClientAliveInterval", 180)
+
def get_ssh_dir(conf=__conf__):
return conf.get("OS.SshDir", "/etc/ssh")
diff --git a/azurelinuxagent/common/errorstate.py b/azurelinuxagent/common/errorstate.py
new file mode 100644
index 0000000..750aa77
--- /dev/null
+++ b/azurelinuxagent/common/errorstate.py
@@ -0,0 +1,31 @@
+from datetime import datetime, timedelta
+
+ERROR_STATE_DELTA = timedelta(minutes=15)
+
+
+class ErrorState(object):
+ def __init__(self, min_timedelta = ERROR_STATE_DELTA):
+ self.min_timedelta = min_timedelta
+
+ self.count = 0
+ self.timestamp = None
+
+ def incr(self):
+ if self.count == 0:
+ self.timestamp = datetime.utcnow()
+
+ self.count += 1
+
+ def reset(self):
+ self.count = 0
+ self.timestamp = None
+
+ def is_triggered(self):
+ if self.timestamp is None:
+ return False
+
+ delta = datetime.utcnow() - self.timestamp
+ if delta >= self.min_timedelta:
+ return True
+
+ return False
diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py
index e62a925..84a439f 100644
--- a/azurelinuxagent/common/event.py
+++ b/azurelinuxagent/common/event.py
@@ -15,51 +15,58 @@
# Requires Python 2.4+ and Openssl 1.0+
#
-import os
-import sys
-import traceback
import atexit
+import datetime
import json
+import os
+import sys
import time
-import datetime
-import threading
-import platform
+import traceback
-from datetime import datetime, timedelta
+from datetime import datetime
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
-from azurelinuxagent.common.exception import EventError, ProtocolError
+from azurelinuxagent.common.exception import EventError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.restapi import TelemetryEventParam, \
- TelemetryEventList, \
TelemetryEvent, \
- set_properties, get_properties
-from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION, \
- DISTRO_CODE_NAME, AGENT_VERSION, \
- CURRENT_AGENT, CURRENT_VERSION
+ get_properties
+from azurelinuxagent.common.version import CURRENT_VERSION
_EVENT_MSG = "Event: name={0}, op={1}, message={2}, duration={3}"
+
class WALAEventOperation:
ActivateResourceDisk = "ActivateResourceDisk"
+ AgentBlacklisted = "AgentBlacklisted"
+ AgentEnabled = "AgentEnabled"
AutoUpdate = "AutoUpdate"
+ CustomData = "CustomData"
+ Deploy = "Deploy"
Disable = "Disable"
Download = "Download"
Enable = "Enable"
+ ExtensionProcessing = "ExtensionProcessing"
Firewall = "Firewall"
HealthCheck = "HealthCheck"
HeartBeat = "HeartBeat"
HostPlugin = "HostPlugin"
+ HttpErrors = "HttpErrors"
Install = "Install"
InitializeHostPlugin = "InitializeHostPlugin"
+ Log = "Log"
+ Partition = "Partition"
ProcessGoalState = "ProcessGoalState"
Provision = "Provision"
+ GuestState = "GuestState"
ReportStatus = "ReportStatus"
Restart = "Restart"
+ SkipUpdate = "SkipUpdate"
UnhandledError = "UnhandledError"
UnInstall = "UnInstall"
+ Unknown = "Unknown"
Upgrade = "Upgrade"
Update = "Update"
@@ -67,7 +74,7 @@ class WALAEventOperation:
class EventStatus(object):
EVENT_STATUS_FILE = "event_status.json"
- def __init__(self, status_dir=conf.get_lib_dir()):
+ def __init__(self):
self._path = None
self._status = {}
@@ -82,7 +89,7 @@ class EventStatus(object):
event = self._event_name(name, version, op)
if event not in self._status:
return True
- return self._status[event] == True
+ return self._status[event] is True
def initialize(self, status_dir=conf.get_lib_dir()):
self._path = os.path.join(status_dir, EventStatus.EVENT_STATUS_FILE)
@@ -90,7 +97,7 @@ class EventStatus(object):
def mark_event_status(self, name, version, op, status):
event = self._event_name(name, version, op)
- self._status[event] = (status == True)
+ self._status[event] = (status is True)
self._save()
def _event_name(self, name, version, op):
@@ -113,6 +120,7 @@ class EventStatus(object):
except Exception as e:
logger.warn("Exception occurred saving event status: {0}".format(e))
+
__event_status__ = EventStatus()
__event_status_operations__ = [
WALAEventOperation.AutoUpdate,
@@ -171,7 +179,7 @@ class EventLogger(object):
(self.periodic_events[h] + delta) <= datetime.now()
def add_periodic(self,
- delta, name, op="", is_success=True, duration=0,
+ delta, name, op=WALAEventOperation.Unknown, is_success=True, duration=0,
version=CURRENT_VERSION, message="", evt_type="",
is_internal=False, log_event=True, force=False):
@@ -186,7 +194,7 @@ class EventLogger(object):
def add_event(self,
name,
- op="",
+ op=WALAEventOperation.Unknown,
is_success=True,
duration=0,
version=CURRENT_VERSION,
@@ -215,6 +223,32 @@ class EventLogger(object):
except EventError as e:
logger.error("{0}", e)
+ def add_log_event(self, level, message):
+ # By the time the message has gotten to this point it is formatted as
+ #
+ # YYYY/MM/DD HH:mm:ss.fffffff LEVEL <text>.
+ #
+ # The timestamp and the level are redundant, and should be stripped.
+ # The logging library does not schematize this data, so I am forced
+ # to parse the message. The format is regular, so the burden is low.
+
+ parts = message.split(' ', 3)
+ msg = parts[3] if len(parts) == 4 \
+ else message
+
+ event = TelemetryEvent(7, "FFF0196F-EE4C-4EAF-9AA5-776F622DEB4F")
+ event.parameters.append(TelemetryEventParam('EventName', WALAEventOperation.Log))
+ event.parameters.append(TelemetryEventParam('CapabilityUsed', logger.LogLevel.STRINGS[level]))
+ event.parameters.append(TelemetryEventParam('Context1', msg))
+ event.parameters.append(TelemetryEventParam('Context2', ''))
+ event.parameters.append(TelemetryEventParam('Context3', ''))
+
+ data = get_properties(event)
+ try:
+ self.save_event(json.dumps(data))
+ except EventError:
+ pass
+
__event_logger__ = EventLogger()
@@ -224,6 +258,7 @@ def elapsed_milliseconds(utc_start):
return int(((d.days * 24 * 60 * 60 + d.seconds) * 1000) + \
(d.microseconds / 1000.0))
+
def report_event(op, is_success=True, message=''):
from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION
add_event(AGENT_NAME,
@@ -232,6 +267,7 @@ def report_event(op, is_success=True, message=''):
message=message,
op=op)
+
def report_periodic(delta, op, is_success=True, message=''):
from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION
add_periodic(delta, AGENT_NAME,
@@ -240,7 +276,9 @@ def report_periodic(delta, op, is_success=True, message=''):
message=message,
op=op)
-def add_event(name, op="", is_success=True, duration=0, version=CURRENT_VERSION,
+
+def add_event(name, op=WALAEventOperation.Unknown, is_success=True, duration=0,
+ version=CURRENT_VERSION,
message="", evt_type="", is_internal=False, log_event=True,
reporter=__event_logger__):
if reporter.event_dir is None:
@@ -255,8 +293,17 @@ def add_event(name, op="", is_success=True, duration=0, version=CURRENT_VERSION,
version=str(version), message=message, evt_type=evt_type,
is_internal=is_internal, log_event=log_event)
+
+def add_log_event(level, message, reporter=__event_logger__):
+ if reporter.event_dir is None:
+ return
+
+ reporter.add_log_event(level, message)
+
+
def add_periodic(
- delta, name, op="", is_success=True, duration=0, version=CURRENT_VERSION,
+ delta, name, op=WALAEventOperation.Unknown, is_success=True, duration=0,
+ version=CURRENT_VERSION,
message="", evt_type="", is_internal=False, log_event=True, force=False,
reporter=__event_logger__):
if reporter.event_dir is None:
@@ -269,10 +316,12 @@ def add_periodic(
version=str(version), message=message, evt_type=evt_type,
is_internal=is_internal, log_event=log_event, force=force)
+
def mark_event_status(name, version, op, status):
if op in __event_status_operations__:
__event_status__.mark_event_status(name, version, op, status)
+
def should_emit_event(name, version, op, status):
return \
op not in __event_status_operations__ or \
@@ -280,12 +329,15 @@ def should_emit_event(name, version, op, status):
not __event_status__.event_marked(name, version, op) or \
__event_status__.event_succeeded(name, version, op) != status
+
def init_event_logger(event_dir):
__event_logger__.event_dir = event_dir
+
def init_event_status(status_dir):
__event_status__.initialize(status_dir)
+
def dump_unhandled_err(name):
if hasattr(sys, 'last_type') and hasattr(sys, 'last_value') and \
hasattr(sys, 'last_traceback'):
diff --git a/azurelinuxagent/common/exception.py b/azurelinuxagent/common/exception.py
index 17c6ce0..bb8ab10 100644
--- a/azurelinuxagent/common/exception.py
+++ b/azurelinuxagent/common/exception.py
@@ -26,8 +26,8 @@ class AgentError(Exception):
Base class of agent error.
"""
- def __init__(self, errno, msg, inner=None):
- msg = u"[{0}] {1}".format(errno, msg)
+ def __init__(self, msg, inner=None):
+ msg = u"[{0}] {1}".format(type(self).__name__, msg)
if inner is not None:
msg = u"{0}\nInner error: {1}".format(msg, inner)
super(AgentError, self).__init__(msg)
@@ -39,16 +39,16 @@ class AgentConfigError(AgentError):
"""
def __init__(self, msg=None, inner=None):
- super(AgentConfigError, self).__init__('000001', msg, inner)
+ super(AgentConfigError, self).__init__(msg, inner)
class AgentNetworkError(AgentError):
"""
- When network is not avaiable.
+ When network is not available\.
"""
def __init__(self, msg=None, inner=None):
- super(AgentNetworkError, self).__init__('000002', msg, inner)
+ super(AgentNetworkError, self).__init__(msg, inner)
class ExtensionError(AgentError):
@@ -57,7 +57,7 @@ class ExtensionError(AgentError):
"""
def __init__(self, msg=None, inner=None):
- super(ExtensionError, self).__init__('000003', msg, inner)
+ super(ExtensionError, self).__init__(msg, inner)
class ProvisionError(AgentError):
@@ -66,7 +66,7 @@ class ProvisionError(AgentError):
"""
def __init__(self, msg=None, inner=None):
- super(ProvisionError, self).__init__('000004', msg, inner)
+ super(ProvisionError, self).__init__(msg, inner)
class ResourceDiskError(AgentError):
@@ -75,7 +75,7 @@ class ResourceDiskError(AgentError):
"""
def __init__(self, msg=None, inner=None):
- super(ResourceDiskError, self).__init__('000005', msg, inner)
+ super(ResourceDiskError, self).__init__(msg, inner)
class DhcpError(AgentError):
@@ -84,7 +84,8 @@ class DhcpError(AgentError):
"""
def __init__(self, msg=None, inner=None):
- super(DhcpError, self).__init__('000006', msg, inner)
+ super(DhcpError, self).__init__(msg, inner)
+
class OSUtilError(AgentError):
"""
@@ -92,7 +93,7 @@ class OSUtilError(AgentError):
"""
def __init__(self, msg=None, inner=None):
- super(OSUtilError, self).__init__('000007', msg, inner)
+ super(OSUtilError, self).__init__(msg, inner)
class ProtocolError(AgentError):
@@ -101,7 +102,7 @@ class ProtocolError(AgentError):
"""
def __init__(self, msg=None, inner=None):
- super(ProtocolError, self).__init__('000008', msg, inner)
+ super(ProtocolError, self).__init__(msg, inner)
class ProtocolNotFoundError(ProtocolError):
@@ -113,13 +114,22 @@ class ProtocolNotFoundError(ProtocolError):
super(ProtocolNotFoundError, self).__init__(msg, inner)
+class RestartError(ProtocolError):
+ """
+ Variant of ProtocolError used to restart processing if the GoalState
+ becomes stale.
+ """
+
+ pass
+
+
class HttpError(AgentError):
"""
Http request failure
"""
def __init__(self, msg=None, inner=None):
- super(HttpError, self).__init__('000009', msg, inner)
+ super(HttpError, self).__init__(msg, inner)
class EventError(AgentError):
@@ -128,7 +138,7 @@ class EventError(AgentError):
"""
def __init__(self, msg=None, inner=None):
- super(EventError, self).__init__('000010', msg, inner)
+ super(EventError, self).__init__(msg, inner)
class CryptError(AgentError):
@@ -137,7 +147,7 @@ class CryptError(AgentError):
"""
def __init__(self, msg=None, inner=None):
- super(CryptError, self).__init__('000011', msg, inner)
+ super(CryptError, self).__init__(msg, inner)
class UpdateError(AgentError):
@@ -146,7 +156,7 @@ class UpdateError(AgentError):
"""
def __init__(self, msg=None, inner=None):
- super(UpdateError, self).__init__('000012', msg, inner)
+ super(UpdateError, self).__init__(msg, inner)
class ResourceGoneError(HttpError):
@@ -155,4 +165,6 @@ class ResourceGoneError(HttpError):
"""
def __init__(self, msg=None, inner=None):
+ if msg is None:
+ msg = "Resource is gone"
super(ResourceGoneError, self).__init__(msg, inner)
diff --git a/azurelinuxagent/common/future.py b/azurelinuxagent/common/future.py
index 8d5b70b..8389cd3 100644
--- a/azurelinuxagent/common/future.py
+++ b/azurelinuxagent/common/future.py
@@ -1,10 +1,10 @@
import sys
"""
-Add alies for python2 and python3 libs and fucntions.
+Add alias for python2 and python3 libs and functions.
"""
-if sys.version_info[0]== 3:
+if sys.version_info[0] == 3:
import http.client as httpclient
from urllib.parse import urlparse
@@ -23,5 +23,4 @@ elif sys.version_info[0] == 2:
bytebuffer = buffer
else:
- raise ImportError("Unknown python version:{0}".format(sys.version_info))
-
+ raise ImportError("Unknown python version: {0}".format(sys.version_info))
diff --git a/azurelinuxagent/common/logger.py b/azurelinuxagent/common/logger.py
index bfdc73a..0a90718 100644
--- a/azurelinuxagent/common/logger.py
+++ b/azurelinuxagent/common/logger.py
@@ -17,8 +17,8 @@
"""
Log utils
"""
-import os
import sys
+
from azurelinuxagent.common.future import ustr
from datetime import datetime, timedelta
@@ -28,6 +28,7 @@ EVERY_HOUR = timedelta(hours=1)
EVERY_HALF_HOUR = timedelta(minutes=30)
EVERY_FIFTEEN_MINUTES = timedelta(minutes=15)
+
class Logger(object):
"""
Logger class
@@ -92,6 +93,7 @@ class Logger(object):
appender = _create_logger_appender(appender_type, level, path)
self.appenders.append(appender)
+
class ConsoleAppender(object):
def __init__(self, level, path):
self.level = level
@@ -105,6 +107,7 @@ class ConsoleAppender(object):
except IOError:
pass
+
class FileAppender(object):
def __init__(self, level, path):
self.level = level
@@ -118,6 +121,7 @@ class FileAppender(object):
except IOError:
pass
+
class StdoutAppender(object):
def __init__(self, level):
self.level = level
@@ -129,9 +133,24 @@ class StdoutAppender(object):
except IOError:
pass
+
+class TelemetryAppender(object):
+ def __init__(self, level, event_func):
+ self.level = level
+ self.event_func = event_func
+
+ def write(self, level, msg):
+ if self.level <= level:
+ try:
+ self.event_func(level, msg)
+ except IOError:
+ pass
+
+
#Initialize logger instance
DEFAULT_LOGGER = Logger()
+
class LogLevel(object):
VERBOSE = 0
INFO = 1
@@ -144,35 +163,46 @@ class LogLevel(object):
"ERROR"
]
+
class AppenderType(object):
FILE = 0
CONSOLE = 1
STDOUT = 2
+ TELEMETRY = 3
+
def add_logger_appender(appender_type, level=LogLevel.INFO, path=None):
DEFAULT_LOGGER.add_appender(appender_type, level, path)
+
def reset_periodic():
DEFAULT_LOGGER.reset_periodic()
+
def periodic(delta, msg_format, *args):
DEFAULT_LOGGER.periodic(delta, msg_format, *args)
+
def verbose(msg_format, *args):
DEFAULT_LOGGER.verbose(msg_format, *args)
+
def info(msg_format, *args):
DEFAULT_LOGGER.info(msg_format, *args)
+
def warn(msg_format, *args):
DEFAULT_LOGGER.warn(msg_format, *args)
+
def error(msg_format, *args):
DEFAULT_LOGGER.error(msg_format, *args)
+
def log(level, msg_format, *args):
DEFAULT_LOGGER.log(level, msg_format, args)
+
def _create_logger_appender(appender_type, level=LogLevel.INFO, path=None):
if appender_type == AppenderType.CONSOLE:
return ConsoleAppender(level, path)
@@ -180,6 +210,8 @@ def _create_logger_appender(appender_type, level=LogLevel.INFO, path=None):
return FileAppender(level, path)
elif appender_type == AppenderType.STDOUT:
return StdoutAppender(level)
+ elif appender_type == AppenderType.TELEMETRY:
+ return TelemetryAppender(level, path)
else:
raise ValueError("Unknown appender type")
diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py
index f0b1050..16b7444 100644
--- a/azurelinuxagent/common/osutil/default.py
+++ b/azurelinuxagent/common/osutil/default.py
@@ -57,10 +57,21 @@ IPTABLES_VERSION = "iptables --version"
IPTABLES_LOCKING_VERSION = FlexibleVersion('1.4.21')
FIREWALL_ACCEPT = "iptables {0} -t security -{1} OUTPUT -d {2} -p tcp -m owner --uid-owner {3} -j ACCEPT"
-FIREWALL_DROP = "iptables {0} -t security -{1} OUTPUT -d {2} -p tcp -j DROP"
-FIREWALL_LIST = "iptables {0} -t security -L"
+# Note:
+# -- Initially "flight" the change to ACCEPT packets and develop a metric baseline
+# A subsequent release will convert the ACCEPT to DROP
+FIREWALL_DROP = "iptables {0} -t security -{1} OUTPUT -d {2} -p tcp -m conntrack --ctstate INVALID,NEW -j ACCEPT"
+# FIREWALL_DROP = "iptables {0} -t security -{1} OUTPUT -d {2} -p tcp -m conntrack --ctstate INVALID,NEW -j DROP"
+FIREWALL_LIST = "iptables {0} -t security -L -nxv"
+FIREWALL_PACKETS = "iptables {0} -t security -L OUTPUT --zero OUTPUT -nxv"
FIREWALL_FLUSH = "iptables {0} -t security --flush"
+# Precisely delete the rules created by the agent.
+FIREWALL_DELETE_CONNTRACK = "iptables {0} -t security -D OUTPUT -d {1} -p tcp -m conntrack --ctstate INVALID,NEW -j ACCEPT"
+FIREWALL_DELETE_OWNER = "iptables {0} -t security -D OUTPUT -d {1} -p tcp -m owner --uid-owner {2} -j ACCEPT"
+
+PACKET_PATTERN = "^\s*(\d+)\s+(\d+)\s+DROP\s+.*{0}[^\d]*$"
+
_enable_firewall = True
DMIDECODE_CMD = 'dmidecode --string system-uuid'
@@ -69,13 +80,45 @@ UUID_PATTERN = re.compile(
r'^\s*[A-F0-9]{8}(?:\-[A-F0-9]{4}){3}\-[A-F0-9]{12}\s*$',
re.IGNORECASE)
-class DefaultOSUtil(object):
+class DefaultOSUtil(object):
def __init__(self):
self.agent_conf_file_path = '/etc/waagent.conf'
self.selinux = None
self.disable_route_warning = False
+ def get_firewall_dropped_packets(self, dst_ip=None):
+ # If a previous attempt failed, do not retry
+ global _enable_firewall
+ if not _enable_firewall:
+ return 0
+
+ try:
+ wait = self.get_firewall_will_wait()
+
+ rc, output = shellutil.run_get_output(FIREWALL_PACKETS.format(wait))
+ if rc == 3:
+ # Transient error that we ignore. This code fires every loop
+ # of the daemon (60m), so we will get the value eventually.
+ return 0
+
+ if rc != 0:
+ return -1
+
+ pattern = re.compile(PACKET_PATTERN.format(dst_ip))
+ for line in output.split('\n'):
+ m = pattern.match(line)
+ if m is not None:
+ return int(m.group(1))
+
+ return 0
+
+ except Exception as e:
+ _enable_firewall = False
+ logger.warn("Unable to retrieve firewall packets dropped"
+ "{0}".format(ustr(e)))
+ return -1
+
def get_firewall_will_wait(self):
# Determine if iptables will serialize access
rc, output = shellutil.run_get_output(IPTABLES_VERSION)
@@ -95,31 +138,46 @@ class DefaultOSUtil(object):
else ""
return wait
- def remove_firewall(self):
- # If a previous attempt threw an exception, do not retry
+ def _delete_rule(self, rule):
+ """
+ Continually execute the delete operation until the return
+ code is non-zero or the limit has been reached.
+ """
+ for i in range(1, 100):
+ rc = shellutil.run(rule, chk_err=False)
+ if rc == 1:
+ return
+ elif rc == 2:
+ raise Exception("invalid firewall deletion rule '{0}'".format(rule))
+
+ def remove_firewall(self, dst_ip=None, uid=None):
+ # If a previous attempt failed, do not retry
global _enable_firewall
if not _enable_firewall:
return False
try:
+ if dst_ip is None or uid is None:
+ msg = "Missing arguments to enable_firewall"
+ logger.warn(msg)
+ raise Exception(msg)
+
wait = self.get_firewall_will_wait()
- flush_rule = FIREWALL_FLUSH.format(wait)
- if shellutil.run(flush_rule, chk_err=False) != 0:
- logger.warn("Failed to flush firewall")
+ self._delete_rule(FIREWALL_DELETE_CONNTRACK.format(wait, dst_ip))
+ self._delete_rule(FIREWALL_DELETE_OWNER.format(wait, dst_ip, uid))
return True
except Exception as e:
_enable_firewall = False
- logger.info("Unable to flush firewall -- "
+ logger.info("Unable to remove firewall -- "
"no further attempts will be made: "
"{0}".format(ustr(e)))
return False
def enable_firewall(self, dst_ip=None, uid=None):
-
- # If a previous attempt threw an exception, do not retry
+ # If a previous attempt failed, do not retry
global _enable_firewall
if not _enable_firewall:
return False
@@ -134,10 +192,15 @@ class DefaultOSUtil(object):
# If the DROP rule exists, make no changes
drop_rule = FIREWALL_DROP.format(wait, "C", dst_ip)
-
- if shellutil.run(drop_rule, chk_err=False) == 0:
+ rc = shellutil.run(drop_rule, chk_err=False)
+ if rc == 0:
logger.verbose("Firewall appears established")
return True
+ elif rc == 2:
+ self.remove_firewall(dst_ip, uid)
+ msg = "please upgrade iptables to a version that supports the -C option"
+ logger.warn(msg)
+ raise Exception(msg)
# Otherwise, append both rules
accept_rule = FIREWALL_ACCEPT.format(wait, "A", dst_ip, uid)
@@ -309,11 +372,11 @@ class DefaultOSUtil(object):
else:
sudoer = "{0} ALL=(ALL) ALL".format(username)
if not os.path.isfile(sudoers_wagent) or \
- fileutil.findstr_in_file(sudoers_wagent, sudoer) is None:
+ fileutil.findstr_in_file(sudoers_wagent, sudoer) is False:
fileutil.append_file(sudoers_wagent, "{0}\n".format(sudoer))
fileutil.chmod(sudoers_wagent, 0o440)
else:
- #Remove user from sudoers
+ # remove user from sudoers
if os.path.isfile(sudoers_wagent):
try:
content = fileutil.read_file(sudoers_wagent)
@@ -440,7 +503,7 @@ class DefaultOSUtil(object):
conf_file = fileutil.read_file(conf_file_path).split("\n")
textutil.set_ssh_config(conf_file, "PasswordAuthentication", option)
textutil.set_ssh_config(conf_file, "ChallengeResponseAuthentication", option)
- textutil.set_ssh_config(conf_file, "ClientAliveInterval", "180")
+ textutil.set_ssh_config(conf_file, "ClientAliveInterval", str(conf.get_ssh_client_alive_interval()))
fileutil.write_file(conf_file_path, "\n".join(conf_file))
logger.info("{0} SSH password-based authentication methods."
.format("Disabled" if disable_password else "Enabled"))
@@ -965,7 +1028,7 @@ class DefaultOSUtil(object):
if not os.path.exists(hostname_record):
# this file is created at provisioning time with agents >= 2.2.3
hostname = socket.gethostname()
- logger.warn('Hostname record does not exist, '
+ logger.info('Hostname record does not exist, '
'creating [{0}] with hostname [{1}]',
hostname_record,
hostname)
diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py
index 43aa6a7..1b4e2cb 100644
--- a/azurelinuxagent/common/osutil/factory.py
+++ b/azurelinuxagent/common/osutil/factory.py
@@ -27,7 +27,8 @@ from .freebsd import FreeBSDOSUtil
from .openbsd import OpenBSDOSUtil
from .redhat import RedhatOSUtil, Redhat6xOSUtil
from .suse import SUSEOSUtil, SUSE11OSUtil
-from .ubuntu import UbuntuOSUtil, Ubuntu12OSUtil, Ubuntu14OSUtil, UbuntuSnappyOSUtil
+from .ubuntu import UbuntuOSUtil, Ubuntu12OSUtil, Ubuntu14OSUtil, \
+ UbuntuSnappyOSUtil, Ubuntu16OSUtil
from .alpine import AlpineOSUtil
from .bigip import BigIpOSUtil
from .gaia import GaiaOSUtil
@@ -46,10 +47,12 @@ def get_osutil(distro_name=DISTRO_NAME,
return ClearLinuxUtil()
if distro_name == "ubuntu":
- if Version(distro_version) == Version("12.04") or Version(distro_version) == Version("12.10"):
+ if Version(distro_version) in [Version("12.04"), Version("12.10")]:
return Ubuntu12OSUtil()
- elif Version(distro_version) == Version("14.04") or Version(distro_version) == Version("14.10"):
+ elif Version(distro_version) in [Version("14.04"), Version("14.10")]:
return Ubuntu14OSUtil()
+ elif Version(distro_version) in [Version('16.04'), Version('16.10'), Version('17.04')]:
+ return Ubuntu16OSUtil()
elif distro_full_name == "Snappy Ubuntu Core":
return UbuntuSnappyOSUtil()
else:
diff --git a/azurelinuxagent/common/osutil/ubuntu.py b/azurelinuxagent/common/osutil/ubuntu.py
index 3c353cf..8dacc75 100644
--- a/azurelinuxagent/common/osutil/ubuntu.py
+++ b/azurelinuxagent/common/osutil/ubuntu.py
@@ -16,9 +16,14 @@
# Requires Python 2.4+ and Openssl 1.0+
#
+import time
+
+import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.shellutil as shellutil
+
from azurelinuxagent.common.osutil.default import DefaultOSUtil
+
class Ubuntu14OSUtil(DefaultOSUtil):
def __init__(self):
super(Ubuntu14OSUtil, self).__init__()
@@ -41,6 +46,7 @@ class Ubuntu14OSUtil(DefaultOSUtil):
def get_dhcp_lease_endpoint(self):
return self.get_endpoint_from_leases_path('/var/lib/dhcp/dhclient.*.leases')
+
class Ubuntu12OSUtil(Ubuntu14OSUtil):
def __init__(self):
super(Ubuntu12OSUtil, self).__init__()
@@ -50,9 +56,13 @@ class Ubuntu12OSUtil(Ubuntu14OSUtil):
ret = shellutil.run_get_output("pidof dhclient3", chk_err=False)
return ret[1] if ret[0] == 0 else None
-class UbuntuOSUtil(Ubuntu14OSUtil):
+
+class Ubuntu16OSUtil(Ubuntu14OSUtil):
+ """
+ Ubuntu 16.04, 16.10, and 17.04.
+ """
def __init__(self):
- super(UbuntuOSUtil, self).__init__()
+ super(Ubuntu16OSUtil, self).__init__()
def register_agent_service(self):
return shellutil.run("systemctl unmask walinuxagent", chk_err=False)
@@ -60,6 +70,29 @@ class UbuntuOSUtil(Ubuntu14OSUtil):
def unregister_agent_service(self):
return shellutil.run("systemctl mask walinuxagent", chk_err=False)
+
+class UbuntuOSUtil(Ubuntu16OSUtil):
+ def __init__(self):
+ super(UbuntuOSUtil, self).__init__()
+
+ def restart_if(self, ifname, retries=3, wait=5):
+ """
+ Restart an interface by bouncing the link. systemd-networkd observes
+ this event, and forces a renew of DHCP.
+ """
+ retry_limit=retries+1
+ for attempt in range(1, retry_limit):
+ return_code=shellutil.run("ip link set {0} down && ip link set {0} up".format(ifname))
+ if return_code == 0:
+ return
+ logger.warn("failed to restart {0}: return code {1}".format(ifname, return_code))
+ if attempt < retry_limit:
+ logger.info("retrying in {0} seconds".format(wait))
+ time.sleep(wait)
+ else:
+ logger.warn("exceeded restart retries")
+
+
class UbuntuSnappyOSUtil(Ubuntu14OSUtil):
def __init__(self):
super(UbuntuSnappyOSUtil, self).__init__()
diff --git a/azurelinuxagent/common/protocol/metadata.py b/azurelinuxagent/common/protocol/metadata.py
index 4de7ecf..0cfa5ed 100644
--- a/azurelinuxagent/common/protocol/metadata.py
+++ b/azurelinuxagent/common/protocol/metadata.py
@@ -173,6 +173,11 @@ class MetadataProtocol(Protocol):
return None
return self.certs
+ def get_incarnation(self):
+ # Always return 0 since Azure Stack does not maintain goal state
+ # incarnation identifiers
+ return 0
+
def get_vmagent_manifests(self):
self.update_goal_state()
@@ -235,7 +240,7 @@ class MetadataProtocol(Protocol):
set_properties("extensionHandlers", ext_list.extHandlers, data)
return ext_list, etag
- def get_ext_handler_pkgs(self, ext_handler):
+ def get_ext_handler_pkgs(self, ext_handler, etag):
logger.verbose("Get extension handler packages")
pkg_list = ExtHandlerPackageList()
diff --git a/azurelinuxagent/common/protocol/restapi.py b/azurelinuxagent/common/protocol/restapi.py
index 1ec3e21..5a678d7 100644
--- a/azurelinuxagent/common/protocol/restapi.py
+++ b/azurelinuxagent/common/protocol/restapi.py
@@ -162,6 +162,7 @@ class ExtHandlerProperties(DataContract):
def __init__(self):
self.version = None
self.upgradePolicy = None
+ self.upgradeGuid = None
self.state = None
self.extensions = DataContractList(Extension)
@@ -245,11 +246,13 @@ class ExtHandlerStatus(DataContract):
def __init__(self,
name=None,
version=None,
+ upgradeGuid=None,
status=None,
code=0,
message=None):
self.name = name
self.version = version
+ self.upgradeGuid = upgradeGuid
self.status = status
self.code = code
self.message = message
@@ -300,6 +303,9 @@ class Protocol(DataContract):
def get_certs(self):
raise NotImplementedError()
+ def get_incarnation(self):
+ raise NotImplementedError()
+
def get_vmagent_manifests(self):
raise NotImplementedError()
@@ -309,15 +315,15 @@ class Protocol(DataContract):
def get_ext_handlers(self):
raise NotImplementedError()
- def get_ext_handler_pkgs(self, extension):
+ def get_ext_handler_pkgs(self, extension, etag):
raise NotImplementedError()
def get_artifacts_profile(self):
raise NotImplementedError()
- def download_ext_handler_pkg(self, uri, headers=None):
+ def download_ext_handler_pkg(self, uri, headers=None, use_proxy=True):
try:
- resp = restutil.http_get(uri, use_proxy=True, headers=headers)
+ resp = restutil.http_get(uri, headers=headers, use_proxy=use_proxy)
if restutil.request_succeeded(resp):
return resp.read()
except Exception as e:
diff --git a/azurelinuxagent/common/protocol/util.py b/azurelinuxagent/common/protocol/util.py
index 3071d7a..b3ec373 100644
--- a/azurelinuxagent/common/protocol/util.py
+++ b/azurelinuxagent/common/protocol/util.py
@@ -26,16 +26,18 @@ import threading
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
+import azurelinuxagent.common.utils.fileutil as fileutil
+
from azurelinuxagent.common.exception import ProtocolError, OSUtilError, \
ProtocolNotFoundError, DhcpError
from azurelinuxagent.common.future import ustr
-import azurelinuxagent.common.utils.fileutil as fileutil
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.dhcp import get_dhcp_handler
from azurelinuxagent.common.protocol.ovfenv import OvfEnv
from azurelinuxagent.common.protocol.wire import WireProtocol
from azurelinuxagent.common.protocol.metadata import MetadataProtocol, \
METADATA_ENDPOINT
+from azurelinuxagent.common.utils.restutil import IOErrorCounter
OVF_FILE_NAME = "ovf-env.xml"
TAG_FILE_NAME = "useMetadataEndpoint.tag"
@@ -175,17 +177,20 @@ class ProtocolUtil(object):
self.clear_protocol()
for retry in range(0, MAX_RETRY):
- for protocol in protocols:
+ for protocol_name in protocols:
try:
- if protocol == "WireProtocol":
- return self._detect_wire_protocol()
-
- if protocol == "MetadataProtocol":
- return self._detect_metadata_protocol()
+ protocol = self._detect_wire_protocol() \
+ if protocol_name == "WireProtocol" \
+ else self._detect_metadata_protocol()
+
+ IOErrorCounter.set_protocol_endpoint(
+ endpoint=protocol.endpoint)
+
+ return protocol
except ProtocolError as e:
logger.info("Protocol endpoint not found: {0}, {1}",
- protocol, e)
+ protocol_name, e)
if retry < MAX_RETRY -1:
logger.info("Retry detect protocols: retry={0}", retry)
diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py
index 4f3b7e0..963d33c 100644
--- a/azurelinuxagent/common/protocol/wire.py
+++ b/azurelinuxagent/common/protocol/wire.py
@@ -18,6 +18,7 @@
import json
import os
+import random
import re
import time
import xml.sax.saxutils as saxutils
@@ -27,7 +28,7 @@ import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.utils.textutil as textutil
from azurelinuxagent.common.exception import ProtocolNotFoundError, \
- ResourceGoneError
+ ResourceGoneError, RestartError
from azurelinuxagent.common.future import httpclient, bytebuffer
from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol
from azurelinuxagent.common.protocol.restapi import *
@@ -51,6 +52,7 @@ P7M_FILE_NAME = "Certificates.p7m"
PEM_FILE_NAME = "Certificates.pem"
EXT_CONF_FILE_NAME = "ExtensionsConfig.{0}.xml"
MANIFEST_FILE_NAME = "{0}.{1}.manifest.xml"
+AGENTS_MANIFEST_FILE_NAME = "{0}.{1}.agentsManifest"
TRANSPORT_CERT_FILE_NAME = "TransportCert.pem"
TRANSPORT_PRV_FILE_NAME = "TransportPrivate.pem"
@@ -58,17 +60,12 @@ PROTOCOL_VERSION = "2012-11-30"
ENDPOINT_FINE_NAME = "WireServer"
SHORT_WAITING_INTERVAL = 1 # 1 second
-LONG_WAITING_INTERVAL = 15 # 15 seconds
class UploadError(HttpError):
pass
-class WireProtocolResourceGone(ProtocolError):
- pass
-
-
class WireProtocol(Protocol):
"""Slim layer to adapt wire protocol data to metadata protocol interface"""
@@ -119,6 +116,13 @@ class WireProtocol(Protocol):
certificates = self.client.get_certs()
return certificates.cert_list
+ def get_incarnation(self):
+ path = os.path.join(conf.get_lib_dir(), INCARNATION_FILE_NAME)
+ if os.path.exists(path):
+ return fileutil.read_file(path)
+ else:
+ return 0
+
def get_vmagent_manifests(self):
# Update goal state to get latest extensions config
self.update_goal_state()
@@ -128,8 +132,9 @@ class WireProtocol(Protocol):
def get_vmagent_pkgs(self, vmagent_manifest):
goal_state = self.client.get_goal_state()
- man = self.client.get_gafamily_manifest(vmagent_manifest, goal_state)
- return man.pkg_list
+ ga_manifest = self.client.get_gafamily_manifest(vmagent_manifest, goal_state)
+ valid_pkg_list = self.client.filter_package_list(vmagent_manifest.family, ga_manifest, goal_state)
+ return valid_pkg_list
def get_ext_handlers(self):
logger.verbose("Get extension handler config")
@@ -140,10 +145,9 @@ class WireProtocol(Protocol):
# In wire protocol, incarnation is equivalent to ETag
return ext_conf.ext_handlers, goal_state.incarnation
- def get_ext_handler_pkgs(self, ext_handler):
+ def get_ext_handler_pkgs(self, ext_handler, etag):
logger.verbose("Get extension handler package")
- goal_state = self.client.get_goal_state()
- man = self.client.get_ext_manifest(ext_handler, goal_state)
+ man = self.client.get_ext_manifest(ext_handler, etag)
return man.pkg_list
def get_artifacts_profile(self):
@@ -156,10 +160,10 @@ class WireProtocol(Protocol):
if package is not None:
return package
else:
- logger.warn("Download did not succeed, falling back to host plugin")
+ logger.verbose("Download did not succeed, falling back to host plugin")
host = self.client.get_host_plugin()
uri, headers = host.get_artifact_request(uri, host.manifest_uri)
- package = super(WireProtocol, self).download_ext_handler_pkg(uri, headers=headers)
+ package = super(WireProtocol, self).download_ext_handler_pkg(uri, headers=headers, use_proxy=False)
return package
def report_provision_status(self, provision_status):
@@ -248,22 +252,28 @@ Convert VMStatus object to status blob format
"""
+def ga_status_to_guest_info(ga_status):
+ v1_ga_guest_info = {
+ "computerName" : ga_status.hostname,
+ "osName" : ga_status.osname,
+ "osVersion" : ga_status.osversion,
+ "version" : ga_status.version,
+ }
+ return v1_ga_guest_info
+
+
def ga_status_to_v1(ga_status):
formatted_msg = {
'lang': 'en-US',
'message': ga_status.message
}
v1_ga_status = {
- 'version': ga_status.version,
- 'status': ga_status.status,
- 'osversion': ga_status.osversion,
- 'osname': ga_status.osname,
- 'hostname': ga_status.hostname,
- 'formattedMessage': formatted_msg
+ "version" : ga_status.version,
+ "status" : ga_status.status,
+ "formattedMessage" : formatted_msg
}
return v1_ga_status
-
def ext_substatus_to_v1(sub_status_list):
status_list = []
for substatus in sub_status_list:
@@ -318,6 +328,9 @@ def ext_handler_status_to_v1(handler_status, ext_statuses, timestamp):
"message": handler_status.message
}
+ if handler_status.upgradeGuid is not None:
+ v1_handler_status["upgradeGuid"] = handler_status.upgradeGuid
+
if len(handler_status.extensions) > 0:
# Currently, no more than one extension per handler
ext_name = handler_status.extensions[0]
@@ -334,6 +347,7 @@ def ext_handler_status_to_v1(handler_status, ext_statuses, timestamp):
def vm_status_to_v1(vm_status, ext_statuses):
timestamp = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+ v1_ga_guest_info = ga_status_to_guest_info(vm_status.vmAgent)
v1_ga_status = ga_status_to_v1(vm_status.vmAgent)
v1_handler_status_list = []
for handler_status in vm_status.vmAgent.extensionHandlers:
@@ -349,7 +363,8 @@ def vm_status_to_v1(vm_status, ext_statuses):
v1_vm_status = {
'version': '1.1',
'timestampUTC': timestamp,
- 'aggregateStatus': v1_agg_status
+ 'aggregateStatus': v1_agg_status,
+ 'guestOSInfo' : v1_ga_guest_info
}
return v1_vm_status
@@ -512,51 +527,29 @@ class WireClient(object):
self.shared_conf = None
self.certs = None
self.ext_conf = None
- self.last_request = 0
- self.req_count = 0
self.host_plugin = None
self.status_blob = StatusBlob(self)
- def prevent_throttling(self):
- """
- Try to avoid throttling of wire server
- """
- now = time.time()
- if now - self.last_request < 1:
- logger.verbose("Last request issued less than 1 second ago")
- logger.verbose("Sleep {0} second to avoid throttling.",
- SHORT_WAITING_INTERVAL)
- time.sleep(SHORT_WAITING_INTERVAL)
- self.last_request = now
-
- self.req_count += 1
- if self.req_count % 3 == 0:
- logger.verbose("Sleep {0} second to avoid throttling.",
- SHORT_WAITING_INTERVAL)
- time.sleep(SHORT_WAITING_INTERVAL)
- self.req_count = 0
-
def call_wireserver(self, http_req, *args, **kwargs):
- self.prevent_throttling()
-
try:
# Never use the HTTP proxy for wireserver
kwargs['use_proxy'] = False
resp = http_req(*args, **kwargs)
+
+ if restutil.request_failed(resp):
+ msg = "[Wireserver Failed] URI {0} ".format(args[0])
+ if resp is not None:
+ msg += " [HTTP Failed] Status Code {0}".format(resp.status)
+ raise ProtocolError(msg)
+
+ # If the GoalState is stale, pass along the exception to the caller
+ except ResourceGoneError:
+ raise
+
except Exception as e:
raise ProtocolError("[Wireserver Exception] {0}".format(
ustr(e)))
- if resp is not None and resp.status == httpclient.GONE:
- msg = args[0] if len(args) > 0 else ""
- raise WireProtocolResourceGone(msg)
-
- elif restutil.request_failed(resp):
- msg = "[Wireserver Failed] URI {0} ".format(args[0])
- if resp is not None:
- msg += " [HTTP Failed] Status Code {0}".format(resp.status)
- raise ProtocolError(msg)
-
return resp
def decode_config(self, data):
@@ -598,7 +591,10 @@ class WireClient(object):
def fetch_manifest(self, version_uris):
logger.verbose("Fetch manifest")
- for version in version_uris:
+ version_uris_shuffled = version_uris
+ random.shuffle(version_uris_shuffled)
+
+ for version in version_uris_shuffled:
response = None
if not HostPluginProtocol.is_default_channel():
response = self.fetch(version.uri)
@@ -698,26 +694,28 @@ class WireClient(object):
self.ext_conf = ExtensionsConfig(xml_text)
def update_goal_state(self, forced=False, max_retry=3):
- uri = GOAL_STATE_URI.format(self.endpoint)
- xml_text = self.fetch_config(uri, self.get_header())
- goal_state = GoalState(xml_text)
-
incarnation_file = os.path.join(conf.get_lib_dir(),
INCARNATION_FILE_NAME)
+ uri = GOAL_STATE_URI.format(self.endpoint)
- if not forced:
- last_incarnation = None
- if os.path.isfile(incarnation_file):
- last_incarnation = fileutil.read_file(incarnation_file)
- new_incarnation = goal_state.incarnation
- if last_incarnation is not None and \
- last_incarnation == new_incarnation:
- # Goalstate is not updated.
- return
-
- # Start updating goalstate, retry on 410
+ goal_state = None
for retry in range(0, max_retry):
try:
+ if goal_state is None:
+ xml_text = self.fetch_config(uri, self.get_header())
+ goal_state = GoalState(xml_text)
+
+ if not forced:
+ last_incarnation = None
+ if os.path.isfile(incarnation_file):
+ last_incarnation = fileutil.read_file(
+ incarnation_file)
+ new_incarnation = goal_state.incarnation
+ if last_incarnation is not None and \
+ last_incarnation == new_incarnation:
+ # Goalstate is not updated.
+ return
+
self.goal_state = goal_state
file_name = GOAL_STATE_FILE_NAME.format(goal_state.incarnation)
goal_state_file = os.path.join(conf.get_lib_dir(), file_name)
@@ -727,21 +725,29 @@ class WireClient(object):
self.update_certs(goal_state)
self.update_ext_conf(goal_state)
self.save_cache(incarnation_file, goal_state.incarnation)
+
if self.host_plugin is not None:
self.host_plugin.container_id = goal_state.container_id
self.host_plugin.role_config_name = goal_state.role_config_name
+
return
- except ProtocolError:
+ except ResourceGoneError:
+ logger.info("GoalState is stale -- re-fetching")
+ goal_state = None
+
+ except Exception as e:
+ log_method = logger.info \
+ if type(e) is ProtocolError \
+ else logger.warn
+ log_method(
+ "Exception processing GoalState-related files: {0}".format(
+ ustr(e)))
+
if retry < max_retry-1:
continue
raise
- except WireProtocolResourceGone:
- logger.info("Incarnation is out of date. Update goalstate.")
- xml_text = self.fetch_config(uri, self.get_header())
- goal_state = GoalState(xml_text)
-
raise ProtocolError("Exceeded max retry updating goal state")
def get_goal_state(self):
@@ -793,25 +799,70 @@ class WireClient(object):
self.ext_conf = ExtensionsConfig(xml_text)
return self.ext_conf
- def get_ext_manifest(self, ext_handler, goal_state):
+ def get_ext_manifest(self, ext_handler, incarnation):
+
for update_goal_state in [False, True]:
try:
if update_goal_state:
self.update_goal_state(forced=True)
- goal_state = self.get_goal_state()
+ incarnation = self.get_goal_state().incarnation
local_file = MANIFEST_FILE_NAME.format(
ext_handler.name,
- goal_state.incarnation)
+ incarnation)
local_file = os.path.join(conf.get_lib_dir(), local_file)
- xml_text = self.fetch_manifest(ext_handler.versionUris)
- self.save_cache(local_file, xml_text)
+
+ xml_text = None
+ if not update_goal_state:
+ try:
+ xml_text = self.fetch_cache(local_file)
+ except ProtocolError:
+ pass
+
+ if xml_text is None:
+ xml_text = self.fetch_manifest(ext_handler.versionUris)
+ self.save_cache(local_file, xml_text)
return ExtensionManifest(xml_text)
except ResourceGoneError:
continue
- raise ProtocolError("Failed to retrieve extension manifest")
+ raise RestartError("Failed to retrieve extension manifest")
+
+ def filter_package_list(self, family, ga_manifest, goal_state):
+ complete_list = ga_manifest.pkg_list
+ agent_manifest = os.path.join(conf.get_lib_dir(),
+ AGENTS_MANIFEST_FILE_NAME.format(
+ family,
+ goal_state.incarnation))
+
+ if not os.path.exists(agent_manifest):
+ # clear memory cache
+ ga_manifest.allowed_versions = None
+
+ # create disk cache
+ with open(agent_manifest, mode='w') as manifest_fh:
+ for version in complete_list.versions:
+ manifest_fh.write('{0}\n'.format(version.version))
+ fileutil.chmod(agent_manifest, 0o644)
+
+ return complete_list
+
+ else:
+ # use allowed versions from cache, otherwise from disk
+ if ga_manifest.allowed_versions is None:
+ with open(agent_manifest, mode='r') as manifest_fh:
+ ga_manifest.allowed_versions = [v.strip('\n') for v
+ in manifest_fh.readlines()]
+
+ # use the updated manifest urls for allowed versions
+ allowed_list = ExtHandlerPackageList()
+ allowed_list.versions = [version for version
+ in complete_list.versions
+ if version.version
+ in ga_manifest.allowed_versions]
+
+ return allowed_list
def get_gafamily_manifest(self, vmagent_manifest, goal_state):
for update_goal_state in [False, True]:
@@ -844,7 +895,7 @@ class WireClient(object):
logger.info("Wire protocol version:{0}", PROTOCOL_VERSION)
elif PROTOCOL_VERSION in version_info.get_supported():
logger.info("Wire protocol version:{0}", PROTOCOL_VERSION)
- logger.warn("Server preferred version:{0}", preferred)
+ logger.info("Server preferred version:{0}", preferred)
else:
error = ("Agent supported wire protocol version: {0} was not "
"advised by Fabric.").format(PROTOCOL_VERSION)
@@ -1360,6 +1411,9 @@ class ExtensionsConfig(object):
ext_handler.properties.version = getattrib(plugin, "version")
ext_handler.properties.state = getattrib(plugin, "state")
+ ext_handler.properties.upgradeGuid = getattrib(plugin, "upgradeGuid")
+ if not ext_handler.properties.upgradeGuid:
+ ext_handler.properties.upgradeGuid = None
auto_upgrade = getattrib(plugin, "autoUpgrade")
if auto_upgrade is not None and auto_upgrade.lower() == "true":
ext_handler.properties.upgradePolicy = "auto"
@@ -1418,6 +1472,7 @@ class ExtensionManifest(object):
raise ValueError("ExtensionManifest is None")
logger.verbose("Load ExtensionManifest.xml")
self.pkg_list = ExtHandlerPackageList()
+ self.allowed_versions = None
self.parse(xml_text)
def parse(self, xml_text):
diff --git a/azurelinuxagent/common/utils/cryptutil.py b/azurelinuxagent/common/utils/cryptutil.py
index 6339eb3..b34c1a5 100644
--- a/azurelinuxagent/common/utils/cryptutil.py
+++ b/azurelinuxagent/common/utils/cryptutil.py
@@ -19,8 +19,11 @@
import base64
import struct
+
from azurelinuxagent.common.future import ustr, bytebuffer
from azurelinuxagent.common.exception import CryptError
+
+import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.shellutil as shellutil
class CryptUtil(object):
@@ -34,7 +37,10 @@ class CryptUtil(object):
cmd = ("{0} req -x509 -nodes -subj /CN=LinuxTransport -days 730 "
"-newkey rsa:2048 -keyout {1} "
"-out {2}").format(self.openssl_cmd, prv_file, crt_file)
- shellutil.run(cmd)
+ rc = shellutil.run(cmd)
+ if rc != 0:
+ logger.error("Failed to create {0} and {1} certificates".format(
+ prv_file, crt_file))
def get_pubkey_from_prv(self, file_name):
cmd = "{0} rsa -in {1} -pubout 2>/dev/null".format(self.openssl_cmd,
@@ -61,6 +67,9 @@ class CryptUtil(object):
"").format(self.openssl_cmd, p7m_file, trans_prv_file,
trans_cert_file, self.openssl_cmd, pem_file)
shellutil.run(cmd)
+ rc = shellutil.run(cmd)
+ if rc != 0:
+ logger.error("Failed to decrypt {0}".format(p7m_file))
def crt_to_ssh(self, input_file, output_file):
shellutil.run("ssh-keygen -i -m PKCS8 -f {0} >> {1}".format(input_file,
diff --git a/azurelinuxagent/common/utils/fileutil.py b/azurelinuxagent/common/utils/fileutil.py
index 96b5b82..1f0c7ac 100644
--- a/azurelinuxagent/common/utils/fileutil.py
+++ b/azurelinuxagent/common/utils/fileutil.py
@@ -27,7 +27,6 @@ import os
import pwd
import re
import shutil
-import string
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.textutil as textutil
@@ -42,9 +41,10 @@ KNOWN_IOERRORS = [
errno.ENOSPC, # Out of space
errno.ENAMETOOLONG, # Name too long
errno.ELOOP, # Too many symbolic links encountered
- errno.EREMOTEIO # Remote I/O error
+ 121 # Remote I/O error (errno.EREMOTEIO -- not present in all Python 2.7+)
]
+
def copy_file(from_path, to_path=None, to_dir=None):
if to_path is None:
to_path = os.path.join(to_dir, os.path.basename(from_path))
@@ -66,11 +66,12 @@ def read_file(filepath, asbin=False, remove_bom=False, encoding='utf-8'):
return data
if remove_bom:
- #Remove bom on bytes data before it is converted into string.
+ # remove bom on bytes data before it is converted into string.
data = textutil.remove_bom(data)
data = ustr(data, encoding=encoding)
return data
+
def write_file(filepath, contents, asbin=False, encoding='utf-8', append=False):
"""
Write 'contents' to 'filepath'.
@@ -82,6 +83,7 @@ def write_file(filepath, contents, asbin=False, encoding='utf-8', append=False):
with open(filepath, mode) as out_file:
out_file.write(data)
+
def append_file(filepath, contents, asbin=False, encoding='utf-8'):
"""
Append 'contents' to 'filepath'.
@@ -93,6 +95,7 @@ def base_name(path):
head, tail = os.path.split(path)
return tail
+
def get_line_startingwith(prefix, filepath):
"""
Return line from 'filepath' if the line startswith 'prefix'
@@ -102,7 +105,6 @@ def get_line_startingwith(prefix, filepath):
return line
return None
-#End File operation util functions
def mkdir(dirpath, mode=None, owner=None):
if not os.path.isdir(dirpath):
@@ -112,6 +114,7 @@ def mkdir(dirpath, mode=None, owner=None):
if owner is not None:
chowner(dirpath, owner)
+
def chowner(path, owner):
if not os.path.exists(path):
logger.error("Path does not exist: {0}".format(path))
@@ -119,19 +122,22 @@ def chowner(path, owner):
owner_info = pwd.getpwnam(owner)
os.chown(path, owner_info[2], owner_info[3])
+
def chmod(path, mode):
if not os.path.exists(path):
logger.error("Path does not exist: {0}".format(path))
else:
os.chmod(path, mode)
+
def rm_files(*args):
for paths in args:
- #Find all possible file paths
+ # find all possible file paths
for path in glob.glob(paths):
if os.path.isfile(path):
os.remove(path)
+
def rm_dirs(*args):
"""
Remove the contents of each directry
@@ -149,20 +155,24 @@ def rm_dirs(*args):
elif os.path.isdir(path):
shutil.rmtree(path)
+
def trim_ext(path, ext):
if not ext.startswith("."):
ext = "." + ext
return path.split(ext)[0] if path.endswith(ext) else path
+
def update_conf_file(path, line_start, val, chk_err=False):
conf = []
if not os.path.isfile(path) and chk_err:
raise IOError("Can't find config file:{0}".format(path))
conf = read_file(path).split('\n')
- conf = [x for x in conf if x is not None and len(x) > 0 and not x.startswith(line_start)]
+ conf = [x for x in conf
+ if x is not None and len(x) > 0 and not x.startswith(line_start)]
conf.append(val)
write_file(path, '\n'.join(conf) + '\n')
+
def search_file(target_dir_name, target_file_name):
for root, dirs, files in os.walk(target_dir_name):
for file_name in files:
@@ -170,24 +180,28 @@ def search_file(target_dir_name, target_file_name):
return os.path.join(root, file_name)
return None
+
def chmod_tree(path, mode):
for root, dirs, files in os.walk(path):
for file_name in files:
os.chmod(os.path.join(root, file_name), mode)
+
def findstr_in_file(file_path, line_str):
"""
Return True if the line is in the file; False otherwise.
- (Trailing whitespace is ignore.)
+ (Trailing whitespace is ignored.)
"""
try:
for line in (open(file_path, 'r')).readlines():
if line_str == line.rstrip():
return True
- except Exception as e:
+ except Exception:
+ # swallow exception
pass
return False
+
def findre_in_file(file_path, line_re):
"""
Return match object if found in file.
@@ -203,6 +217,7 @@ def findre_in_file(file_path, line_re):
return None
+
def get_all_files(root_path):
"""
Find all files under the given root path
@@ -213,6 +228,7 @@ def get_all_files(root_path):
return result
+
def clean_ioerror(e, paths=[]):
"""
Clean-up possibly bad files and directories after an IO error.
@@ -228,5 +244,6 @@ def clean_ioerror(e, paths=[]):
shutil.rmtree(path, ignore_errors=True)
else:
os.remove(path)
- except Exception as e:
+ except Exception:
+ # swallow exception
pass
diff --git a/azurelinuxagent/common/utils/flexible_version.py b/azurelinuxagent/common/utils/flexible_version.py
index 2fce88d..14c2a73 100644
--- a/azurelinuxagent/common/utils/flexible_version.py
+++ b/azurelinuxagent/common/utils/flexible_version.py
@@ -37,7 +37,7 @@ class FlexibleVersion(version.Version):
self.prerelease = None
self.version = ()
if vstring:
- self._parse(vstring)
+ self._parse(str(vstring))
return
_nn_version = 'version'
diff --git a/azurelinuxagent/common/utils/restutil.py b/azurelinuxagent/common/utils/restutil.py
index ddd930b..807be29 100644
--- a/azurelinuxagent/common/utils/restutil.py
+++ b/azurelinuxagent/common/utils/restutil.py
@@ -18,6 +18,7 @@
#
import os
+import threading
import time
import traceback
@@ -32,10 +33,11 @@ from azurelinuxagent.common.version import PY_VERSION_MAJOR
SECURE_WARNING_EMITTED = False
-DEFAULT_RETRIES = 3
+DEFAULT_RETRIES = 6
+DELAY_IN_SECONDS = 1
-SHORT_DELAY_IN_SECONDS = 5
-LONG_DELAY_IN_SECONDS = 15
+THROTTLE_RETRIES = 25
+THROTTLE_DELAY_IN_SECONDS = 1
RETRY_CODES = [
httpclient.RESET_CONTENT,
@@ -63,7 +65,8 @@ OK_CODES = [
THROTTLE_CODES = [
httpclient.FORBIDDEN,
- httpclient.SERVICE_UNAVAILABLE
+ httpclient.SERVICE_UNAVAILABLE,
+ 429, # Request Rate Limit Exceeded
]
RETRY_EXCEPTIONS = [
@@ -76,6 +79,48 @@ RETRY_EXCEPTIONS = [
HTTP_PROXY_ENV = "http_proxy"
HTTPS_PROXY_ENV = "https_proxy"
+DEFAULT_PROTOCOL_ENDPOINT='168.63.129.16'
+HOST_PLUGIN_PORT = 32526
+
+
+class IOErrorCounter(object):
+ _lock = threading.RLock()
+ _protocol_endpoint = DEFAULT_PROTOCOL_ENDPOINT
+ _counts = {"hostplugin":0, "protocol":0, "other":0}
+
+ @staticmethod
+ def increment(host=None, port=None):
+ with IOErrorCounter._lock:
+ if host == IOErrorCounter._protocol_endpoint:
+ if port == HOST_PLUGIN_PORT:
+ IOErrorCounter._counts["hostplugin"] += 1
+ else:
+ IOErrorCounter._counts["protocol"] += 1
+ else:
+ IOErrorCounter._counts["other"] += 1
+
+ @staticmethod
+ def get_and_reset():
+ with IOErrorCounter._lock:
+ counts = IOErrorCounter._counts.copy()
+ IOErrorCounter.reset()
+ return counts
+
+ @staticmethod
+ def reset():
+ with IOErrorCounter._lock:
+ IOErrorCounter._counts = {"hostplugin":0, "protocol":0, "other":0}
+
+ @staticmethod
+ def set_protocol_endpoint(endpoint=DEFAULT_PROTOCOL_ENDPOINT):
+ IOErrorCounter._protocol_endpoint = endpoint
+
+
+def _compute_delay(retry_attempt=1, delay=DELAY_IN_SECONDS):
+ fib = (1, 1)
+ for n in range(retry_attempt):
+ fib = (fib[1], fib[0]+fib[1])
+ return delay*fib[1]
def _is_retry_status(status, retry_codes=RETRY_CODES):
return status in retry_codes
@@ -166,7 +211,7 @@ def http_request(method,
use_proxy=False,
max_retry=DEFAULT_RETRIES,
retry_codes=RETRY_CODES,
- retry_delay=SHORT_DELAY_IN_SECONDS):
+ retry_delay=DELAY_IN_SECONDS):
global SECURE_WARNING_EMITTED
@@ -208,18 +253,31 @@ def http_request(method,
msg = ''
attempt = 0
- delay = retry_delay
+ delay = 0
+ was_throttled = False
while attempt < max_retry:
if attempt > 0:
- logger.info("[HTTP Retry] Attempt {0} of {1}: {2}",
+ # Compute the request delay
+ # -- Use a fixed delay if the server ever rate-throttles the request
+ # (with a safe, minimum number of retry attempts)
+ # -- Otherwise, compute a delay that is the product of the next
+ # item in the Fibonacci series and the initial delay value
+ delay = THROTTLE_DELAY_IN_SECONDS \
+ if was_throttled \
+ else _compute_delay(retry_attempt=attempt,
+ delay=retry_delay)
+
+ logger.verbose("[HTTP Retry] "
+ "Attempt {0} of {1} will delay {2} seconds: {3}",
attempt+1,
max_retry,
+ delay,
msg)
+
time.sleep(delay)
attempt += 1
- delay = retry_delay
try:
resp = _http_request(method,
@@ -235,13 +293,13 @@ def http_request(method,
if request_failed(resp):
if _is_retry_status(resp.status, retry_codes=retry_codes):
- msg = '[HTTP Retry] HTTP {0} Status Code {1}'.format(
- method, resp.status)
+ msg = '[HTTP Retry] {0} {1} -- Status Code {2}'.format(
+ method, url, resp.status)
+ # Note if throttled and ensure a safe, minimum number of
+ # retry attempts
if _is_throttle_status(resp.status):
- delay = LONG_DELAY_IN_SECONDS
- logger.info("[HTTP Delay] Delay {0} seconds for " \
- "Status Code {1}".format(
- delay, resp.status))
+ was_throttled = True
+ max_retry = max(max_retry, THROTTLE_RETRIES)
continue
if resp.status in RESOURCE_GONE_CODES:
@@ -250,22 +308,25 @@ def http_request(method,
return resp
except httpclient.HTTPException as e:
- msg = '[HTTP Failed] HTTP {0} HttpException {1}'.format(method, e)
+ msg = '[HTTP Failed] {0} {1} -- HttpException {2}'.format(
+ method, url, e)
if _is_retry_exception(e):
continue
break
except IOError as e:
- msg = '[HTTP Failed] HTTP {0} IOError {1}'.format(method, e)
+ IOErrorCounter.increment(host=host, port=port)
+ msg = '[HTTP Failed] {0} {1} -- IOError {2}'.format(
+ method, url, e)
continue
- raise HttpError(msg)
+ raise HttpError("{0} -- {1} attempts made".format(msg,attempt))
def http_get(url, headers=None, use_proxy=False,
max_retry=DEFAULT_RETRIES,
retry_codes=RETRY_CODES,
- retry_delay=SHORT_DELAY_IN_SECONDS):
+ retry_delay=DELAY_IN_SECONDS):
return http_request("GET",
url, None, headers=headers,
use_proxy=use_proxy,
@@ -277,7 +338,7 @@ def http_get(url, headers=None, use_proxy=False,
def http_head(url, headers=None, use_proxy=False,
max_retry=DEFAULT_RETRIES,
retry_codes=RETRY_CODES,
- retry_delay=SHORT_DELAY_IN_SECONDS):
+ retry_delay=DELAY_IN_SECONDS):
return http_request("HEAD",
url, None, headers=headers,
use_proxy=use_proxy,
@@ -289,7 +350,7 @@ def http_head(url, headers=None, use_proxy=False,
def http_post(url, data, headers=None, use_proxy=False,
max_retry=DEFAULT_RETRIES,
retry_codes=RETRY_CODES,
- retry_delay=SHORT_DELAY_IN_SECONDS):
+ retry_delay=DELAY_IN_SECONDS):
return http_request("POST",
url, data, headers=headers,
use_proxy=use_proxy,
@@ -301,7 +362,7 @@ def http_post(url, data, headers=None, use_proxy=False,
def http_put(url, data, headers=None, use_proxy=False,
max_retry=DEFAULT_RETRIES,
retry_codes=RETRY_CODES,
- retry_delay=SHORT_DELAY_IN_SECONDS):
+ retry_delay=DELAY_IN_SECONDS):
return http_request("PUT",
url, data, headers=headers,
use_proxy=use_proxy,
@@ -313,7 +374,7 @@ def http_put(url, data, headers=None, use_proxy=False,
def http_delete(url, headers=None, use_proxy=False,
max_retry=DEFAULT_RETRIES,
retry_codes=RETRY_CODES,
- retry_delay=SHORT_DELAY_IN_SECONDS):
+ retry_delay=DELAY_IN_SECONDS):
return http_request("DELETE",
url, None, headers=headers,
use_proxy=use_proxy,
diff --git a/azurelinuxagent/common/utils/shellutil.py b/azurelinuxagent/common/utils/shellutil.py
index fff6aa8..7b8e0c9 100644
--- a/azurelinuxagent/common/utils/shellutil.py
+++ b/azurelinuxagent/common/utils/shellutil.py
@@ -84,16 +84,22 @@ def run_get_output(cmd, chk_err=True, log_cmd=True):
output = ustr(output,
encoding='utf-8',
errors="backslashreplace")
- except subprocess.CalledProcessError as e:
- output = ustr(e.output,
- encoding='utf-8',
- errors="backslashreplace")
- if chk_err:
- if log_cmd:
- logger.error(u"Command: '{0}'", e.cmd)
- logger.error(u"Return code: {0}", e.returncode)
- logger.error(u"Result: {0}", output)
- return e.returncode, output
+ except Exception as e:
+ if type(e) is subprocess.CalledProcessError:
+ output = ustr(e.output,
+ encoding='utf-8',
+ errors="backslashreplace")
+ if chk_err:
+ if log_cmd:
+ logger.error(u"Command: '{0}'", e.cmd)
+ logger.error(u"Return code: {0}", e.returncode)
+ logger.error(u"Result: {0}", output)
+ return e.returncode, output
+ else:
+ logger.error(
+ u"'{0}' raised unexpected exception: '{1}'".format(
+ cmd, ustr(e)))
+ return -1, ustr(e)
return 0, output
diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py
index e1bb828..f743f11 100644
--- a/azurelinuxagent/common/version.py
+++ b/azurelinuxagent/common/version.py
@@ -113,7 +113,7 @@ def get_distro():
AGENT_NAME = "WALinuxAgent"
AGENT_LONG_NAME = "Azure Linux Agent"
-AGENT_VERSION = '2.2.17'
+AGENT_VERSION = '2.2.21'
AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION)
AGENT_DESCRIPTION = """
The Azure Linux Agent supports the provisioning and running of Linux
@@ -161,14 +161,17 @@ def set_current_agent():
version = AGENT_VERSION
return agent, FlexibleVersion(version)
+
def is_agent_package(path):
path = os.path.basename(path)
return not re.match(AGENT_PKG_PATTERN, path) is None
+
def is_agent_path(path):
path = os.path.basename(path)
return not re.match(AGENT_NAME_PATTERN, path) is None
+
CURRENT_AGENT, CURRENT_VERSION = set_current_agent()
diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py
index 45b10bb..d9b7d82 100644
--- a/azurelinuxagent/ga/env.py
+++ b/azurelinuxagent/ga/env.py
@@ -17,11 +17,16 @@
# Requires Python 2.4+ and Openssl 1.0+
#
+import re
import os
import socket
import time
import threading
+import operator
+
+import datetime
+
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
@@ -29,15 +34,29 @@ from azurelinuxagent.common.dhcp import get_dhcp_handler
from azurelinuxagent.common.event import add_periodic, WALAEventOperation
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.protocol import get_protocol_util
+from azurelinuxagent.common.protocol.wire import INCARNATION_FILE_NAME
+from azurelinuxagent.common.utils import fileutil
from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION
+CACHE_PATTERNS = [
+ re.compile("^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE),
+ re.compile("^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE),
+ re.compile("^(.*)\.(\d+)\.(xml)$", re.IGNORECASE)
+]
+
+MAXIMUM_CACHED_FILES = 50
+
+CACHE_PURGE_INTERVAL = datetime.timedelta(hours=24)
+
+
def get_env_handler():
return EnvHandler()
+
class EnvHandler(object):
"""
Monitor changes to dhcp and hostname.
- If dhcp clinet process re-start has occurred, reset routes, dhcp with fabric.
+ If dhcp client process re-start has occurred, reset routes, dhcp with fabric.
Monitor scsi disk.
If new scsi disk found, set timeout
@@ -48,9 +67,10 @@ class EnvHandler(object):
self.protocol_util = get_protocol_util()
self.stopped = True
self.hostname = None
- self.dhcpid = None
+ self.dhcp_id = None
self.server_thread = None
self.dhcp_warning_enabled = True
+ self.last_purge = None
def run(self):
if not self.stopped:
@@ -61,7 +81,7 @@ class EnvHandler(object):
logger.info("Start env monitor service.")
self.dhcp_handler.conf_routes()
self.hostname = self.osutil.get_hostname_record()
- self.dhcpid = self.osutil.get_dhcp_pid()
+ self.dhcp_id = self.osutil.get_dhcp_pid()
self.server_thread = threading.Thread(target=self.monitor)
self.server_thread.setDaemon(True)
self.server_thread.start()
@@ -70,26 +90,24 @@ class EnvHandler(object):
"""
Monitor firewall rules
Monitor dhcp client pid and hostname.
- If dhcp clinet process re-start has occurred, reset routes.
+ If dhcp client process re-start has occurred, reset routes.
+ Purge unnecessary files from disk cache.
"""
protocol = self.protocol_util.get_protocol()
while not self.stopped:
self.osutil.remove_rules_files()
- # Disable setting firewall for now, regardless of configuration switch
- # if conf.enable_firewall():
- # success = self.osutil.enable_firewall(
- # dst_ip=protocol.endpoint,
- # uid=os.getuid())
- # add_periodic(
- # logger.EVERY_HOUR,
- # AGENT_NAME,
- # version=CURRENT_VERSION,
- # op=WALAEventOperation.Firewall,
- # is_success=success,
- # log_event=True)
-
- self.osutil.remove_firewall()
+ if conf.enable_firewall():
+ success = self.osutil.enable_firewall(
+ dst_ip=protocol.endpoint,
+ uid=os.getuid())
+ add_periodic(
+ logger.EVERY_HOUR,
+ AGENT_NAME,
+ version=CURRENT_VERSION,
+ op=WALAEventOperation.Firewall,
+ is_success=success,
+ log_event=False)
timeout = conf.get_root_device_scsi_timeout()
if timeout is not None:
@@ -100,6 +118,8 @@ class EnvHandler(object):
self.handle_dhclient_restart()
+ self.purge_disk_cache()
+
time.sleep(5)
def handle_hostname_update(self):
@@ -113,30 +133,95 @@ class EnvHandler(object):
self.hostname = curr_hostname
def handle_dhclient_restart(self):
- if self.dhcpid is None:
+ if self.dhcp_id is None:
if self.dhcp_warning_enabled:
logger.warn("Dhcp client is not running. ")
- self.dhcpid = self.osutil.get_dhcp_pid()
+ self.dhcp_id = self.osutil.get_dhcp_pid()
# disable subsequent error logging
- self.dhcp_warning_enabled = self.dhcpid is not None
+ self.dhcp_warning_enabled = self.dhcp_id is not None
+ return
+
+ # the dhcp process has not changed since the last check
+ if self.osutil.check_pid_alive(self.dhcp_id.strip()):
return
- #The dhcp process hasn't changed since last check
- if self.osutil.check_pid_alive(self.dhcpid.strip()):
+ new_pid = self.osutil.get_dhcp_pid()
+ if new_pid is not None and new_pid != self.dhcp_id:
+ logger.info("EnvMonitor: Detected dhcp client restart. "
+ "Restoring routing table.")
+ self.dhcp_handler.conf_routes()
+ self.dhcp_id = new_pid
+
+ def purge_disk_cache(self):
+ """
+ Ensure the number of cached files does not exceed a maximum count.
+ Purge only once per interval, and never delete files related to the
+ current incarnation.
+ """
+ if self.last_purge is not None \
+ and datetime.datetime.utcnow() < \
+ self.last_purge + CACHE_PURGE_INTERVAL:
+ return
+
+ current_incarnation = -1
+ self.last_purge = datetime.datetime.utcnow()
+ incarnation_file = os.path.join(conf.get_lib_dir(),
+ INCARNATION_FILE_NAME)
+ if os.path.exists(incarnation_file):
+ last_incarnation = fileutil.read_file(incarnation_file)
+ if last_incarnation is not None:
+ current_incarnation = int(last_incarnation)
+
+ logger.info("Purging disk cache, current incarnation is {0}"
+ .format('not found'
+ if current_incarnation == -1
+ else current_incarnation))
+
+ # Create tuples: (prefix, suffix, incarnation, name, file_modified)
+ files = []
+ for f in os.listdir(conf.get_lib_dir()):
+ full_path = os.path.join(conf.get_lib_dir(), f)
+ for pattern in CACHE_PATTERNS:
+ m = pattern.match(f)
+ if m is not None:
+ prefix = m.group(1)
+ suffix = m.group(3)
+ incarnation = int(m.group(2))
+ file_modified = os.path.getmtime(full_path)
+ t = (prefix, suffix, incarnation, f, file_modified)
+ files.append(t)
+ break
+
+ if len(files) <= 0:
return
- newpid = self.osutil.get_dhcp_pid()
- if newpid is not None and newpid != self.dhcpid:
- logger.info("EnvMonitor: Detected dhcp client restart. "
- "Restoring routing table.")
- self.dhcp_handler.conf_routes()
- self.dhcpid = newpid
+ # Sort by (prefix, suffix, file_modified) in reverse order
+ files = sorted(files, key=operator.itemgetter(0, 1, 4), reverse=True)
+
+ # Remove any files in excess of the maximum allowed
+ # -- Restart then whenever the (prefix, suffix) change
+ count = 0
+ last_match = [None, None]
+ for f in files:
+ if last_match != f[0:2]:
+ last_match = f[0:2]
+ count = 0
+
+ if current_incarnation == f[2]:
+ logger.verbose("Skipping {0}".format(f[3]))
+ continue
+
+ count += 1
+
+ if count > MAXIMUM_CACHED_FILES:
+ full_name = os.path.join(conf.get_lib_dir(), f[3])
+ logger.verbose("Deleting {0}".format(full_name))
+ os.remove(full_name)
def stop(self):
"""
- Stop server comminucation and join the thread to main thread.
+ Stop server communication and join the thread to main thread.
"""
self.stopped = True
if self.server_thread is not None:
self.server_thread.join()
-
diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py
index f0a3b09..cc1796b 100644
--- a/azurelinuxagent/ga/exthandlers.py
+++ b/azurelinuxagent/ga/exthandlers.py
@@ -17,10 +17,12 @@
# Requires Python 2.4+ and Openssl 1.0+
#
+import datetime
import glob
import json
import os
import os.path
+import random
import re
import shutil
import stat
@@ -31,25 +33,22 @@ import zipfile
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.fileutil as fileutil
-import azurelinuxagent.common.utils.restutil as restutil
-import azurelinuxagent.common.utils.shellutil as shellutil
import azurelinuxagent.common.version as version
+from azurelinuxagent.common.errorstate import ErrorState, ERROR_STATE_DELTA
-from azurelinuxagent.common.event import add_event, WALAEventOperation
-from azurelinuxagent.common.exception import ExtensionError, ProtocolError, HttpError
+from azurelinuxagent.common.event import add_event, WALAEventOperation, elapsed_milliseconds
+from azurelinuxagent.common.exception import ExtensionError, ProtocolError, RestartError
from azurelinuxagent.common.future import ustr
-from azurelinuxagent.common.version import AGENT_VERSION
from azurelinuxagent.common.protocol.restapi import ExtHandlerStatus, \
ExtensionStatus, \
ExtensionSubStatus, \
- Extension, \
VMStatus, ExtHandler, \
get_properties, \
set_properties
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
-from azurelinuxagent.common.utils.textutil import Version
from azurelinuxagent.common.protocol import get_protocol_util
-from azurelinuxagent.common.version import AGENT_NAME, CURRENT_AGENT, CURRENT_VERSION
+from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION
+
#HandlerEnvironment.json schema version
HANDLER_ENVIRONMENT_VERSION = 1.0
@@ -64,7 +63,6 @@ HANDLER_PKG_EXT = ".zip"
HANDLER_PKG_PATTERN = re.compile(HANDLER_PATTERN+"\\"+HANDLER_PKG_EXT+"$",
re.IGNORECASE)
-
def validate_has_key(obj, key, fullname):
if key not in obj:
raise ExtensionError("Missing: {0}".format(fullname))
@@ -176,35 +174,73 @@ class ExtHandlersHandler(object):
self.protocol = None
self.ext_handlers = None
self.last_etag = None
+ self.last_upgrade_guids = {}
self.log_report = False
self.log_etag = True
+ self.log_process = False
+
+ self.report_status_error_state = ErrorState()
def run(self):
self.ext_handlers, etag = None, None
try:
self.protocol = self.protocol_util.get_protocol()
self.ext_handlers, etag = self.protocol.get_ext_handlers()
- except ProtocolError as e:
+ except Exception as e:
msg = u"Exception retrieving extension handlers: {0}".format(
ustr(e))
logger.warn(msg)
- add_event(AGENT_NAME, version=CURRENT_VERSION, is_success=False, message=msg)
+ add_event(AGENT_NAME,
+ version=CURRENT_VERSION,
+ op=WALAEventOperation.ExtensionProcessing,
+ is_success=False,
+ message=msg)
return
- msg = u"Handle extensions updates for incarnation {0}".format(etag)
- logger.verbose(msg)
- # Log status report success on new config
- self.log_report = True
- self.handle_ext_handlers(etag)
- self.last_etag = etag
-
- self.report_ext_handlers_status()
- self.cleanup_outdated_handlers()
+ try:
+ msg = u"Handle extensions updates for incarnation {0}".format(etag)
+ logger.verbose(msg)
+ # Log status report success on new config
+ self.log_report = True
+ self.handle_ext_handlers(etag)
+ self.last_etag = etag
+
+ self.report_ext_handlers_status()
+ self.cleanup_outdated_handlers()
+ except RestartError:
+ raise
+ except Exception as e:
+ msg = u"Exception processing extension handlers: {0}".format(
+ ustr(e))
+ logger.warn(msg)
+ add_event(AGENT_NAME,
+ version=CURRENT_VERSION,
+ op=WALAEventOperation.ExtensionProcessing,
+ is_success=False,
+ message=msg)
+ return
def run_status(self):
self.report_ext_handlers_status()
return
+ def get_upgrade_guid(self, name):
+ return self.last_upgrade_guids.get(name, (None, False))[0]
+
+ def get_log_upgrade_guid(self, ext_handler):
+ return self.last_upgrade_guids.get(ext_handler.name, (None, False))[1]
+
+ def set_log_upgrade_guid(self, ext_handler, log_val):
+ guid = self.get_upgrade_guid(ext_handler.name)
+ if guid is not None:
+ self.last_upgrade_guids[ext_handler.name] = (guid, log_val)
+
+ def is_new_guid(self, ext_handler):
+ last_guid = self.get_upgrade_guid(ext_handler.name)
+ if last_guid is None:
+ return True
+ return last_guid != ext_handler.properties.upgradeGuid
+
def cleanup_outdated_handlers(self):
handlers = []
pkgs = []
@@ -288,7 +324,29 @@ class ExtHandlersHandler(object):
ext_handler_i = ExtHandlerInstance(ext_handler, self.protocol)
try:
- ext_handler_i.decide_version()
+ state = ext_handler.properties.state
+ # The extension is to be enabled, there is an upgrade GUID
+ # and the GUID is NOT new
+ if state == u"enabled" and \
+ ext_handler.properties.upgradeGuid is not None and \
+ not self.is_new_guid(ext_handler):
+ ext_handler_i.ext_handler.properties.version = ext_handler_i.get_installed_version()
+ ext_handler_i.set_logger()
+ if self.last_etag != etag:
+ self.set_log_upgrade_guid(ext_handler, True)
+
+ msg = "New GUID is the same as the old GUID. Exiting without upgrading."
+ if self.get_log_upgrade_guid(ext_handler):
+ ext_handler_i.logger.info(msg)
+ self.set_log_upgrade_guid(ext_handler, False)
+ ext_handler_i.set_handler_state(ExtHandlerState.Enabled)
+ ext_handler_i.set_handler_status(status="Ready", message="No change")
+ ext_handler_i.set_operation(WALAEventOperation.SkipUpdate)
+ ext_handler_i.report_event(message=ustr(msg), is_success=True)
+ return
+
+ self.set_log_upgrade_guid(ext_handler, True)
+ ext_handler_i.decide_version(etag=etag, target_state=state)
if not ext_handler_i.is_upgrade and self.last_etag == etag:
if self.log_etag:
ext_handler_i.logger.verbose("Version {0} is current for etag {1}",
@@ -299,22 +357,34 @@ class ExtHandlersHandler(object):
self.log_etag = True
- state = ext_handler.properties.state
ext_handler_i.logger.info("Target handler state: {0}", state)
if state == u"enabled":
self.handle_enable(ext_handler_i)
+ if ext_handler.properties.upgradeGuid is not None:
+ ext_handler_i.logger.info("New Upgrade GUID: {0}", ext_handler.properties.upgradeGuid)
+ self.last_upgrade_guids[ext_handler.name] = (ext_handler.properties.upgradeGuid, True)
elif state == u"disabled":
self.handle_disable(ext_handler_i)
+ # Remove the GUID from the dictionary so that it is upgraded upon re-enable
+ self.last_upgrade_guids.pop(ext_handler.name, None)
elif state == u"uninstall":
self.handle_uninstall(ext_handler_i)
+ # Remove the GUID from the dictionary so that it is upgraded upon re-install
+ self.last_upgrade_guids.pop(ext_handler.name, None)
else:
message = u"Unknown ext handler state:{0}".format(state)
raise ExtensionError(message)
- except ExtensionError as e:
+ except RestartError:
+ ext_handler_i.logger.info("GoalState became stale during "
+ "processing. Restarting with new "
+ "GoalState")
+ raise
+ except Exception as e:
ext_handler_i.set_handler_status(message=ustr(e), code=-1)
ext_handler_i.report_event(message=ustr(e), is_success=False)
def handle_enable(self, ext_handler_i):
+ self.log_process = True
old_ext_handler_i = ext_handler_i.get_installed_ext_handler()
if old_ext_handler_i is not None and \
old_ext_handler_i.version_gt(ext_handler_i):
@@ -341,6 +411,7 @@ class ExtHandlersHandler(object):
ext_handler_i.enable()
def handle_disable(self, ext_handler_i):
+ self.log_process = True
handler_state = ext_handler_i.get_handler_state()
ext_handler_i.logger.info("[Disable] current handler state is: {0}",
handler_state.lower())
@@ -348,6 +419,7 @@ class ExtHandlersHandler(object):
ext_handler_i.disable()
def handle_uninstall(self, ext_handler_i):
+ self.log_process = True
handler_state = ext_handler_i.get_handler_state()
ext_handler_i.logger.info("[Uninstall] current handler state is: {0}",
handler_state.lower())
@@ -356,7 +428,7 @@ class ExtHandlersHandler(object):
ext_handler_i.disable()
ext_handler_i.uninstall()
ext_handler_i.rm_ext_handler_dir()
-
+
def report_ext_handlers_status(self):
"""Go through handler_state dir, collect and report status"""
vm_status = VMStatus(status="Ready", message="Guest Agent is running")
@@ -368,6 +440,7 @@ class ExtHandlersHandler(object):
add_event(
AGENT_NAME,
version=CURRENT_VERSION,
+ op=WALAEventOperation.ExtensionProcessing,
is_success=False,
message=ustr(e))
@@ -376,9 +449,27 @@ class ExtHandlersHandler(object):
self.protocol.report_vm_status(vm_status)
if self.log_report:
logger.verbose("Completed vm agent status report")
+ self.report_status_error_state.reset()
except ProtocolError as e:
+ self.report_status_error_state.incr()
message = "Failed to report vm agent status: {0}".format(e)
- add_event(AGENT_NAME, version=CURRENT_VERSION, is_success=False, message=message)
+ add_event(AGENT_NAME,
+ version=CURRENT_VERSION,
+ op=WALAEventOperation.ExtensionProcessing,
+ is_success=False,
+ message=message)
+
+ if self.report_status_error_state.is_triggered():
+ message = "Failed to report vm agent status for more than {0}"\
+ .format(ERROR_STATE_DELTA)
+
+ add_event(AGENT_NAME,
+ version=CURRENT_VERSION,
+ op=WALAEventOperation.ExtensionProcessing,
+ is_success=False,
+ message=message)
+
+ self.report_status_error_state.reset()
def report_ext_handler_status(self, vm_status, ext_handler):
ext_handler_i = ExtHandlerInstance(ext_handler, self.protocol)
@@ -386,6 +477,9 @@ class ExtHandlersHandler(object):
handler_status = ext_handler_i.get_handler_status()
if handler_status is None:
return
+ guid = self.get_upgrade_guid(ext_handler.name)
+ if guid is not None:
+ handler_status.upgradeGuid = guid
handler_state = ext_handler_i.get_handler_state()
if handler_state != ExtHandlerState.NotInstalled:
@@ -413,9 +507,7 @@ class ExtHandlerInstance(object):
self.pkg = None
self.pkg_file = None
self.is_upgrade = False
-
- prefix = "[{0}]".format(self.get_full_name())
- self.logger = logger.Logger(logger.DEFAULT_LOGGER, prefix)
+ self.set_logger()
try:
fileutil.mkdir(self.get_log_dir(), mode=0o755)
@@ -426,15 +518,13 @@ class ExtHandlerInstance(object):
self.logger.add_appender(logger.AppenderType.FILE,
logger.LogLevel.INFO, log_file)
- def decide_version(self):
+ def decide_version(self, etag, target_state=None):
self.logger.verbose("Decide which version to use")
- try:
- pkg_list = self.protocol.get_ext_handler_pkgs(self.ext_handler)
- except ProtocolError as e:
- raise ExtensionError("Failed to get ext handler pkgs", e)
+ pkg_list = self.protocol.get_ext_handler_pkgs(self.ext_handler, etag)
# Determine the desired and installed versions
- requested_version = FlexibleVersion(self.ext_handler.properties.version)
+ requested_version = FlexibleVersion(
+ str(self.ext_handler.properties.version))
installed_version_string = self.get_installed_version()
installed_version = requested_version \
if installed_version_string is None \
@@ -511,13 +601,22 @@ class ExtHandlerInstance(object):
# Note:
# - A downgrade, which will be bound to the same major version,
# is allowed if the installed version is no longer available
- if selected_pkg is None \
+ if target_state == u"uninstall":
+ if installed_pkg is None:
+ msg = "Failed to find installed version of {0} " \
+ "to uninstall".format(self.ext_handler.name)
+ self.logger.warn(msg)
+ self.pkg = installed_pkg
+ self.ext_handler.properties.version = str(installed_version) \
+ if installed_version is not None else None
+ elif selected_pkg is None \
or (installed_pkg is not None and selected_version < installed_version):
self.pkg = installed_pkg
- self.ext_handler.properties.version = installed_version
+ self.ext_handler.properties.version = str(installed_version) \
+ if installed_version is not None else None
else:
self.pkg = selected_pkg
- self.ext_handler.properties.version = selected_pkg.version
+ self.ext_handler.properties.version = str(selected_pkg.version)
# Note if the selected package is greater than that installed
if installed_pkg is None \
@@ -528,12 +627,17 @@ class ExtHandlerInstance(object):
raise ExtensionError("Failed to find any valid extension package")
self.logger.verbose("Use version: {0}", self.pkg.version)
+ self.set_logger()
return
+ def set_logger(self):
+ prefix = "[{0}]".format(self.get_full_name())
+ self.logger = logger.Logger(logger.DEFAULT_LOGGER, prefix)
+
def version_gt(self, other):
self_version = self.ext_handler.properties.version
other_version = other.ext_handler.properties.version
- return Version(self_version) > Version(other_version)
+ return FlexibleVersion(self_version) > FlexibleVersion(other_version)
def get_installed_ext_handler(self):
lastest_version = self.get_installed_version()
@@ -585,23 +689,26 @@ class ExtHandlerInstance(object):
status_file = os.path.join(old_ext_status_dir, status_file)
if os.path.isfile(status_file):
shutil.copy2(status_file, new_ext_status_dir)
-
+
def set_operation(self, op):
self.operation = op
- def report_event(self, message="", is_success=True):
+ def report_event(self, message="", is_success=True, duration=0):
version = self.ext_handler.properties.version
add_event(name=self.ext_handler.name, version=version, message=message,
- op=self.operation, is_success=is_success)
+ op=self.operation, is_success=is_success, duration=duration)
def download(self):
+ begin_utc = datetime.datetime.utcnow()
self.logger.verbose("Download extension package")
self.set_operation(WALAEventOperation.Download)
if self.pkg is None:
raise ExtensionError("No package uri found")
package = None
- for uri in self.pkg.uris:
+ uris_shuffled = self.pkg.uris
+ random.shuffle(uris_shuffled)
+ for uri in uris_shuffled:
try:
package = self.protocol.download_ext_handler_pkg(uri.uri)
if package is not None:
@@ -627,7 +734,8 @@ class ExtHandlerInstance(object):
for file in fileutil.get_all_files(self.get_base_dir()):
fileutil.chmod(file, os.stat(file).st_mode | stat.S_IXUSR)
- self.report_event(message="Download succeeded")
+ duration = elapsed_milliseconds(begin_utc)
+ self.report_event(message="Download succeeded", duration=duration)
self.logger.info("Initialize extension directory")
#Save HandlerManifest.json
@@ -649,8 +757,25 @@ class ExtHandlerInstance(object):
try:
status_dir = self.get_status_dir()
fileutil.mkdir(status_dir, mode=0o700)
+
+ seq_no, status_path = self.get_status_file_path()
+ if seq_no > -1:
+ now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
+ status = {
+ "version": 1.0,
+ "timestampUTC" : now,
+ "status" : {
+ "name" : self.ext_handler.name,
+ "operation" : "Enabling Handler",
+ "status" : "transitioning",
+ "code" : 0
+ }
+ }
+ fileutil.write_file(json.dumps(status), status_path)
+
conf_dir = self.get_conf_dir()
fileutil.mkdir(conf_dir, mode=0o700)
+
except IOError as e:
fileutil.clean_ioerror(e,
paths=[self.get_base_dir(), self.pkg_file])
@@ -740,17 +865,24 @@ class ExtHandlerInstance(object):
continue
return seq_no
+ def get_status_file_path(self):
+ seq_no = self.get_largest_seq_no()
+ path = None
+
+ if seq_no > -1:
+ path = os.path.join(
+ self.get_status_dir(),
+ "{0}.status".format(seq_no))
+
+ return seq_no, path
+
def collect_ext_status(self, ext):
self.logger.verbose("Collect extension status")
- seq_no = self.get_largest_seq_no()
+ seq_no, ext_status_file = self.get_status_file_path()
if seq_no == -1:
return None
- status_dir = self.get_status_dir()
- ext_status_file = "{0}.status".format(seq_no)
- ext_status_file = os.path.join(status_dir, ext_status_file)
-
ext_status = ExtensionStatus(seq_no=seq_no)
try:
data_str = fileutil.read_file(ext_status_file)
@@ -810,6 +942,7 @@ class ExtHandlerInstance(object):
return last_update <= 600 # updated within the last 10 min
def launch_command(self, cmd, timeout=300):
+ begin_utc = datetime.datetime.utcnow()
self.logger.verbose("Launch command: [{0}]", cmd)
base_dir = self.get_base_dir()
try:
@@ -835,7 +968,8 @@ class ExtHandlerInstance(object):
if ret == None or ret != 0:
raise ExtensionError("Non-zero exit code: {0}, {1}".format(ret, cmd))
- self.report_event(message="Launch command succeeded: {0}".format(cmd))
+ duration = elapsed_milliseconds(begin_utc)
+ self.report_event(message="Launch command succeeded: {0}".format(cmd), duration=duration)
def load_manifest(self):
man_file = self.get_manifest_file()
@@ -929,7 +1063,7 @@ class ExtHandlerInstance(object):
handler_status = ExtHandlerStatus()
handler_status.name = self.ext_handler.name
- handler_status.version = self.ext_handler.properties.version
+ handler_status.version = str(self.ext_handler.properties.version)
handler_status.message = message
handler_status.code = code
handler_status.status = status
@@ -980,7 +1114,7 @@ class ExtHandlerInstance(object):
def get_log_dir(self):
return os.path.join(conf.get_ext_log_dir(), self.ext_handler.name,
- self.ext_handler.properties.version)
+ str(self.ext_handler.properties.version))
class HandlerEnvironment(object):
def __init__(self, data):
diff --git a/azurelinuxagent/ga/monitor.py b/azurelinuxagent/ga/monitor.py
index 307a514..71ac9b0 100644
--- a/azurelinuxagent/ga/monitor.py
+++ b/azurelinuxagent/ga/monitor.py
@@ -21,8 +21,10 @@ import os
import platform
import time
import threading
+import uuid
import azurelinuxagent.common.conf as conf
+import azurelinuxagent.common.utils.fileutil as fileutil
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
@@ -34,6 +36,7 @@ from azurelinuxagent.common.protocol.restapi import TelemetryEventParam, \
TelemetryEventList, \
TelemetryEvent, \
set_properties
+from azurelinuxagent.common.utils.restutil import IOErrorCounter
from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, getattrib
from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION, \
DISTRO_CODE_NAME, AGENT_LONG_VERSION, \
@@ -179,15 +182,53 @@ class MonitorHandler(object):
def daemon(self):
period = datetime.timedelta(minutes=30)
+ protocol = self.protocol_util.get_protocol()
last_heartbeat = datetime.datetime.utcnow() - period
+
+ # Create a new identifier on each restart and reset the counter
+ heartbeat_id = str(uuid.uuid4()).upper()
+ counter = 0
while True:
if datetime.datetime.utcnow() >= (last_heartbeat + period):
last_heartbeat = datetime.datetime.utcnow()
+ incarnation = protocol.get_incarnation()
+ dropped_packets = self.osutil.get_firewall_dropped_packets(
+ protocol.endpoint)
+
+ msg = "{0};{1};{2};{3}".format(
+ incarnation, counter, heartbeat_id, dropped_packets)
+
add_event(
name=AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.HeartBeat,
- is_success=True)
+ is_success=True,
+ message=msg,
+ log_event=False)
+
+ counter += 1
+
+ io_errors = IOErrorCounter.get_and_reset()
+ hostplugin_errors = io_errors.get("hostplugin")
+ protocol_errors = io_errors.get("protocol")
+ other_errors = io_errors.get("other")
+
+ if hostplugin_errors > 0 \
+ or protocol_errors > 0 \
+ or other_errors > 0:
+
+ msg = "hostplugin:{0};protocol:{1};other:{2}"\
+ .format(hostplugin_errors,
+ protocol_errors,
+ other_errors)
+ add_event(
+ name=AGENT_NAME,
+ version=CURRENT_VERSION,
+ op=WALAEventOperation.HttpErrors,
+ is_success=True,
+ message=msg,
+ log_event=False)
+
try:
self.collect_and_send_events()
except Exception as e:
diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py
index b7ee96a..2e43031 100644
--- a/azurelinuxagent/ga/update.py
+++ b/azurelinuxagent/ga/update.py
@@ -21,9 +21,11 @@ import glob
import json
import os
import platform
+import random
import re
import shutil
import signal
+import stat
import subprocess
import sys
import time
@@ -43,6 +45,7 @@ from azurelinuxagent.common.event import add_event, add_periodic, \
WALAEventOperation
from azurelinuxagent.common.exception import ProtocolError, \
ResourceGoneError, \
+ RestartError, \
UpdateError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil import get_osutil
@@ -60,7 +63,7 @@ from azurelinuxagent.ga.exthandlers import HandlerManifest
AGENT_ERROR_FILE = "error.json" # File name for agent error record
AGENT_MANIFEST_FILE = "HandlerManifest.json"
-AGENT_SUPPORTED_FILE = "supported.json"
+AGENT_PARTITION_FILE = "partition"
CHILD_HEALTH_INTERVAL = 15 * 60
CHILD_LAUNCH_INTERVAL = 5 * 60
@@ -71,10 +74,18 @@ MAX_FAILURE = 3 # Max failure allowed for agent before blacklisted
GOAL_STATE_INTERVAL = 3
-ORPHAN_WAIT_INTERVAL = 15 * 60 * 60
+ORPHAN_WAIT_INTERVAL = 15 * 60
AGENT_SENTINAL_FILE = "current_version"
+READONLY_FILE_GLOBS = [
+ "*.crt",
+ "*.p7m",
+ "*.pem",
+ "*.prv",
+ "ovf-env.xml"
+]
+
def get_update_handler():
return UpdateHandler()
@@ -101,7 +112,6 @@ class UpdateHandler(object):
self.child_process = None
self.signal_handler = None
- return
def run_latest(self, child_args=None):
"""
@@ -233,36 +243,43 @@ class UpdateHandler(object):
This is the main loop which watches for agent and extension updates.
"""
- logger.info(u"Agent {0} is running as the goal state agent",
- CURRENT_AGENT)
+ try:
+ logger.info(u"Agent {0} is running as the goal state agent",
+ CURRENT_AGENT)
- # Launch monitoring threads
- from azurelinuxagent.ga.monitor import get_monitor_handler
- get_monitor_handler().run()
+ # Launch monitoring threads
+ from azurelinuxagent.ga.monitor import get_monitor_handler
+ get_monitor_handler().run()
- from azurelinuxagent.ga.env import get_env_handler
- get_env_handler().run()
+ from azurelinuxagent.ga.env import get_env_handler
+ get_env_handler().run()
- from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state
- exthandlers_handler = get_exthandlers_handler()
- migrate_handler_state()
+ from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state
+ exthandlers_handler = get_exthandlers_handler()
+ migrate_handler_state()
- try:
self._ensure_no_orphans()
self._emit_restart_event()
+ self._ensure_partition_assigned()
+ self._ensure_readonly_files()
while self.running:
if self._is_orphaned:
- logger.info("Goal state agent {0} was orphaned -- exiting",
+ logger.info("Agent {0} is an orphan -- exiting",
CURRENT_AGENT)
break
if self._upgrade_available():
- if len(self.agents) > 0:
+ available_agent = self.get_latest_agent()
+ if available_agent is None:
+ logger.info(
+ "Agent {0} is reverting to the installed agent -- exiting",
+ CURRENT_AGENT)
+ else:
logger.info(
- u"Agent {0} discovered {1} as an update and will exit",
+ u"Agent {0} discovered update {1} -- exiting",
CURRENT_AGENT,
- self.agents[0].name)
+ available_agent.name)
break
utc_start = datetime.utcnow()
@@ -271,20 +288,24 @@ class UpdateHandler(object):
exthandlers_handler.run()
if last_etag != exthandlers_handler.last_etag:
+ self._ensure_readonly_files()
add_event(
AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.ProcessGoalState,
is_success=True,
duration=elapsed_milliseconds(utc_start),
+ message="Incarnation {0}".format(
+ exthandlers_handler.last_etag),
log_event=True)
time.sleep(GOAL_STATE_INTERVAL)
except Exception as e:
- logger.warn(u"Agent {0} failed with exception: {1}",
- CURRENT_AGENT,
- ustr(e))
+ msg = u"Agent {0} failed with exception: {1}".format(
+ CURRENT_AGENT, ustr(e))
+ self._set_sentinal(msg=msg)
+ logger.warn(msg)
logger.warn(traceback.format_exc())
sys.exit(1)
# additional return here because sys.exit is mocked in unit tests
@@ -338,17 +359,21 @@ class UpdateHandler(object):
return available_agents[0] if len(available_agents) >= 1 else None
def _emit_restart_event(self):
- if not self._is_clean_start:
- msg = u"{0} did not terminate cleanly".format(CURRENT_AGENT)
- logger.info(msg)
- add_event(
- AGENT_NAME,
- version=CURRENT_VERSION,
- op=WALAEventOperation.Restart,
- is_success=False,
- message=msg)
+ try:
+ if not self._is_clean_start:
+ msg = u"Agent did not terminate cleanly: {0}".format(
+ fileutil.read_file(self._sentinal_file_path()))
+ logger.info(msg)
+ add_event(
+ AGENT_NAME,
+ version=CURRENT_VERSION,
+ op=WALAEventOperation.Restart,
+ is_success=False,
+ message=msg)
+ except Exception:
+ pass
- self._set_sentinal()
+ self._set_sentinal(msg="Starting")
return
def _ensure_no_orphans(self, orphan_wait_interval=ORPHAN_WAIT_INTERVAL):
@@ -382,6 +407,26 @@ class UpdateHandler(object):
ustr(e))
return
+ def _ensure_partition_assigned(self):
+ """
+ Assign the VM to a partition (0 - 99). Downloaded updates may be configured
+ to run on only some VMs; the assigned partition determines eligibility.
+ """
+ if not os.path.exists(self._partition_file):
+ partition = ustr(int(datetime.utcnow().microsecond / 10000))
+ fileutil.write_file(self._partition_file, partition)
+ add_event(
+ AGENT_NAME,
+ version=CURRENT_VERSION,
+ op=WALAEventOperation.Partition,
+ is_success=True,
+ message=partition)
+
+ def _ensure_readonly_files(self):
+ for g in READONLY_FILE_GLOBS:
+ for path in glob.iglob(os.path.join(conf.get_lib_dir(), g)):
+ os.chmod(path, stat.S_IRUSR)
+
def _evaluate_agent_health(self, latest_agent):
"""
Evaluate the health of the selected agent: If it is restarting
@@ -412,7 +457,6 @@ class UpdateHandler(object):
def _filter_blacklisted_agents(self):
self.agents = [agent for agent in self.agents if not agent.is_blacklisted]
- return
def _find_agents(self):
"""
@@ -447,19 +491,7 @@ class UpdateHandler(object):
@property
def _is_clean_start(self):
- if not os.path.isfile(self._sentinal_file_path()):
- return True
-
- try:
- if fileutil.read_file(self._sentinal_file_path()) != CURRENT_AGENT:
- return True
- except Exception as e:
- logger.warn(
- u"Exception reading sentinal file {0}: {1}",
- self._sentinal_file_path(),
- str(e))
-
- return False
+ return not os.path.isfile(self._sentinal_file_path())
@property
def _is_orphaned(self):
@@ -472,11 +504,29 @@ class UpdateHandler(object):
return fileutil.read_file(conf.get_agent_pid_file_path()) != ustr(parent_pid)
+ def _is_version_eligible(self, version):
+ # Ensure the installed version is always eligible
+ if version == CURRENT_VERSION and is_current_agent_installed():
+ return True
+
+ for agent in self.agents:
+ if agent.version == version:
+ return agent.is_available
+
+ return False
+
def _load_agents(self):
path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))
return [GuestAgent(path=agent_dir)
for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)]
+ def _partition(self):
+ return int(fileutil.read_file(self._partition_file))
+
+ @property
+ def _partition_file(self):
+ return os.path.join(conf.get_lib_dir(), AGENT_PARTITION_FILE)
+
def _purge_agents(self):
"""
Remove from disk all directories and .zip files of unknown agents
@@ -485,8 +535,8 @@ class UpdateHandler(object):
path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))
known_versions = [agent.version for agent in self.agents]
- if not is_current_agent_installed() and CURRENT_VERSION not in known_versions:
- logger.warn(
+ if CURRENT_VERSION not in known_versions:
+ logger.info(
u"Running Agent {0} was not found in the agent manifest - adding to list",
CURRENT_VERSION)
known_versions.append(CURRENT_VERSION)
@@ -511,9 +561,11 @@ class UpdateHandler(object):
self.agents.sort(key=lambda agent: agent.version, reverse=True)
return
- def _set_sentinal(self, agent=CURRENT_AGENT):
+ def _set_sentinal(self, agent=CURRENT_AGENT, msg="Unknown cause"):
try:
- fileutil.write_file(self._sentinal_file_path(), agent)
+ fileutil.write_file(
+ self._sentinal_file_path(),
+ "[{0}] [{1}]".format(agent, msg))
except Exception as e:
logger.warn(
u"Exception writing sentinal file {0}: {1}",
@@ -599,16 +651,18 @@ class UpdateHandler(object):
self._purge_agents()
self._filter_blacklisted_agents()
- # Return True if more recent agents are available
- return len(self.agents) > 0 and \
- self.agents[0].version > base_version
+ # Return True if current agent is no longer available or an
+ # agent with a higher version number is available
+ return not self._is_version_eligible(base_version) \
+ or (len(self.agents) > 0 \
+ and self.agents[0].version > base_version)
except Exception as e:
if isinstance(e, ResourceGoneError):
continue
msg = u"Exception retrieving agent manifests: {0}".format(
- ustr(e))
+ ustr(traceback.format_exc()))
logger.warn(msg)
add_event(
AGENT_NAME,
@@ -667,8 +721,6 @@ class GuestAgent(object):
self.error = GuestAgentError(self.get_agent_error_file())
self.error.load()
- self.supported = Supported(self.get_agent_supported_file())
- self.supported.load()
try:
self._ensure_downloaded()
@@ -677,6 +729,13 @@ class GuestAgent(object):
if isinstance(e, ResourceGoneError):
raise
+ # The agent was improperly blacklisting versions due to a timeout
+ # encountered while downloading a later version. Errors of type
+ # socket.error are IOError, so this should provide sufficient
+ # protection against a large class of I/O operation failures.
+ if isinstance(e, IOError):
+ raise
+
# Note the failure, blacklist the agent if the package downloaded
# - An exception with a downloaded package indicates the package
# is corrupt (e.g., missing the HandlerManifest.json file)
@@ -691,7 +750,6 @@ class GuestAgent(object):
op=WALAEventOperation.Install,
is_success=False,
message=msg)
- return
@property
def name(self):
@@ -712,13 +770,9 @@ class GuestAgent(object):
def get_agent_pkg_path(self):
return ".".join((os.path.join(conf.get_lib_dir(), self.name), "zip"))
- def get_agent_supported_file(self):
- return os.path.join(conf.get_lib_dir(), self.name, AGENT_SUPPORTED_FILE)
-
def clear_error(self):
self.error.clear()
self.error.save()
- return
@property
def is_available(self):
@@ -730,15 +784,8 @@ class GuestAgent(object):
@property
def is_downloaded(self):
- return self.is_blacklisted or os.path.isfile(self.get_agent_manifest_path())
-
- @property
- def _is_optional(self):
- return self.error is not None and self.error.is_sentinel and self.supported.is_supported
-
- @property
- def _in_slice(self):
- return self.supported.is_supported and self.supported.in_slice
+ return self.is_blacklisted or \
+ os.path.isfile(self.get_agent_manifest_path())
def mark_failure(self, is_fatal=False):
try:
@@ -750,21 +797,6 @@ class GuestAgent(object):
logger.warn(u"Agent {0} is permanently blacklisted", self.name)
except Exception as e:
logger.warn(u"Agent {0} failed recording error state: {1}", self.name, ustr(e))
- return
-
- def _enable(self):
- # Enable optional agents if within the "slice"
- # - The "slice" is a percentage of the agent to execute
- # - Blacklist out-of-slice agents to prevent reconsideration
- if self._is_optional:
- if self._in_slice:
- self.error.clear()
- self.error.save()
- logger.info(u"Enabled optional Agent {0}", self.name)
- else:
- self.mark_failure(is_fatal=True)
- logger.info(u"Optional Agent {0} not in slice", self.name)
- return
def _ensure_downloaded(self):
logger.verbose(u"Ensuring Agent {0} is downloaded", self.name)
@@ -788,18 +820,15 @@ class GuestAgent(object):
op=WALAEventOperation.Install,
is_success=True,
message=msg)
- return
def _ensure_loaded(self):
self._load_manifest()
self._load_error()
- self._load_supported()
-
- self._enable()
- return
def _download(self):
- for uri in self.pkg.uris:
+ uris_shuffled = self.pkg.uris
+ random.shuffle(uris_shuffled)
+ for uri in uris_shuffled:
if not HostPluginProtocol.is_default_channel() and self._fetch(uri.uri):
break
@@ -838,8 +867,6 @@ class GuestAgent(object):
message=msg)
raise UpdateError(msg)
- return
-
def _fetch(self, uri, headers=None, use_proxy=True):
package = None
try:
@@ -871,14 +898,6 @@ class GuestAgent(object):
logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error))
except Exception as e:
logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e))
- return
-
- def _load_supported(self):
- try:
- self.supported = Supported(self.get_agent_supported_file())
- self.supported.load()
- except Exception as e:
- self.supported = Supported()
def _load_manifest(self):
path = self.get_agent_manifest_path()
@@ -976,10 +995,6 @@ class GuestAgentError(object):
def is_blacklisted(self):
return self.was_fatal or self.failure_count >= MAX_FAILURE
- @property
- def is_sentinel(self):
- return self.was_fatal and self.last_failure == 0.0
-
def load(self):
if self.path is not None and os.path.isfile(self.path):
with open(self.path, 'r') as f:
@@ -1015,60 +1030,3 @@ class GuestAgentError(object):
self.last_failure,
self.failure_count,
self.was_fatal)
-
-class Supported(object):
- def __init__(self, path):
- if path is None:
- raise UpdateError(u"Supported requires a path")
- self.path = path
- self.distributions = {}
- return
-
- @property
- def is_supported(self):
- return self._supported_distribution is not None
-
- @property
- def in_slice(self):
- d = self._supported_distribution
- return d is not None and d.in_slice
-
- def load(self):
- self.distributions = {}
- try:
- if self.path is not None and os.path.isfile(self.path):
- j = json.loads(fileutil.read_file(self.path))
- for d in j:
- self.distributions[d] = SupportedDistribution(j[d])
- except Exception as e:
- logger.warn("Failed JSON parse of {0}: {1}".format(self.path, e))
- return
-
- @property
- def _supported_distribution(self):
- for d in self.distributions:
- dd = self.distributions[d]
- if dd.is_supported:
- return dd
- return None
-
-class SupportedDistribution(object):
- def __init__(self, s):
- if s is None or not isinstance(s, dict):
- raise UpdateError(u"SupportedDisribution requires a dictionary")
-
- self.slice = s['slice']
- self.versions = s['versions']
-
- @property
- def is_supported(self):
- d = ','.join(platform.linux_distribution())
- for v in self.versions:
- if re.match(v, d):
- return True
- return False
-
- @property
- def in_slice(self):
- n = int((60 * self.slice) / 100)
- return (n - datetime.utcnow().second) > 0
diff --git a/azurelinuxagent/pa/deprovision/default.py b/azurelinuxagent/pa/deprovision/default.py
index e2c5613..895264a 100644
--- a/azurelinuxagent/pa/deprovision/default.py
+++ b/azurelinuxagent/pa/deprovision/default.py
@@ -123,6 +123,7 @@ class DeprovisionHandler(object):
known_files = [
'HostingEnvironmentConfig.xml',
'Incarnation',
+ 'partition',
'Protocol',
'SharedConfig.xml',
'WireServerEndpoint'
diff --git a/azurelinuxagent/pa/provision/cloudinit.py b/azurelinuxagent/pa/provision/cloudinit.py
index 5789e9a..fa47799 100644
--- a/azurelinuxagent/pa/provision/cloudinit.py
+++ b/azurelinuxagent/pa/provision/cloudinit.py
@@ -26,9 +26,9 @@ from datetime import datetime
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.fileutil as fileutil
-import azurelinuxagent.common.utils.shellutil as shellutil
-from azurelinuxagent.common.event import elapsed_milliseconds
+from azurelinuxagent.common.event import elapsed_milliseconds, \
+ WALAEventOperation
from azurelinuxagent.common.exception import ProvisionError, ProtocolError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol import OVF_FILE_NAME
@@ -64,9 +64,12 @@ class CloudInitProvisionHandler(ProvisionHandler):
logger.info("Finished provisioning")
self.report_ready(thumbprint)
- self.report_event("Provision succeed",
+ self.report_event("Provisioning with cloud-init succeeded",
is_success=True,
duration=elapsed_milliseconds(utc_start))
+ self.report_event(self.create_guest_state_telemetry_messsage(),
+ is_success=True,
+ operation=WALAEventOperation.GuestState)
except ProvisionError as e:
logger.error("Provisioning failed: {0}", ustr(e))
@@ -103,7 +106,7 @@ class CloudInitProvisionHandler(ProvisionHandler):
"after {1}s".format(ovf_file_path,
max_retry * sleep_time))
- def wait_for_ssh_host_key(self, max_retry=360, sleep_time=5):
+ def wait_for_ssh_host_key(self, max_retry=1800, sleep_time=1):
"""
Wait for cloud-init to generate ssh host key
"""
diff --git a/azurelinuxagent/pa/provision/default.py b/azurelinuxagent/pa/provision/default.py
index 2f7ec18..44e171b 100644
--- a/azurelinuxagent/pa/provision/default.py
+++ b/azurelinuxagent/pa/provision/default.py
@@ -22,6 +22,8 @@ Provision handler
import os
import os.path
import re
+import socket
+import time
from datetime import datetime
@@ -87,10 +89,14 @@ class ProvisionHandler(object):
self.write_provisioned()
- self.report_event("Provision succeed",
+ self.report_event("Provisioning succeeded",
is_success=True,
duration=elapsed_milliseconds(utc_start))
+ self.report_event(self.create_guest_state_telemetry_messsage(),
+ is_success=True,
+ operation=WALAEventOperation.GuestState)
+
self.report_ready(thumbprint)
logger.info("Provisioning complete")
@@ -244,9 +250,14 @@ class ProvisionHandler(object):
fileutil.write_file(customdata_file, customdata)
if conf.get_execute_customdata():
+ start = time.time()
logger.info("Execute custom data")
os.chmod(customdata_file, 0o700)
shellutil.run(customdata_file)
+ add_event(name=AGENT_NAME,
+ duration=int(time.time() - start),
+ is_success=True,
+ op=WALAEventOperation.CustomData)
def deploy_ssh_pubkeys(self, ovfenv):
for pubkey in ovfenv.ssh_pubkeys:
@@ -258,12 +269,53 @@ class ProvisionHandler(object):
logger.info("Deploy ssh key pairs.")
self.osutil.deploy_ssh_keypair(ovfenv.username, keypair)
- def report_event(self, message, is_success=False, duration=0):
+ def report_event(self, message, is_success=False, duration=0,
+ operation=WALAEventOperation.Provision):
add_event(name=AGENT_NAME,
message=message,
duration=duration,
is_success=is_success,
- op=WALAEventOperation.Provision)
+ op=operation)
+
+ def get_cpu_count(self):
+ try:
+ count = len([x for x in open('/proc/cpuinfo').readlines()
+ if x.startswith("processor")])
+ return count
+ except Exception as e:
+ logger.verbose(u"Failed to determine the CPU count: {0}.", ustr(e))
+ pass
+ return -1
+
+ def get_mem_size_mb(self):
+ try:
+ for line in open('/proc/meminfo').readlines():
+ m = re.match('^MemTotal:\s*(\d+) kB$', line)
+ if m is not None:
+ return int(int(m.group(1)) / 1024)
+ except Exception as e:
+ logger.verbose(u"Failed to determine the memory size: {0}..", ustr(e))
+ pass
+ return -1
+
+ def create_guest_state_telemetry_messsage(self):
+ """
+ Create a GuestState JSON message that contains the current CPU, Memory
+ (MB), and hostname of the guest.
+
+ e.g.
+
+ {
+ "cpu": 1,
+ "mem": 1024,
+ "hostname": "server1234"
+ }
+ """
+ cpu = self.get_cpu_count()
+ mem = self.get_mem_size_mb()
+
+ return """{{"cpu": {0}, "mem": {1}, "hostname": "{2}"}}"""\
+ .format(cpu, mem, socket.gethostname())
def report_not_ready(self, sub_status, description):
status = ProvisionStatus(status="NotReady", subStatus=sub_status,
diff --git a/azurelinuxagent/pa/provision/factory.py b/azurelinuxagent/pa/provision/factory.py
index d87765f..9e88618 100644
--- a/azurelinuxagent/pa/provision/factory.py
+++ b/azurelinuxagent/pa/provision/factory.py
@@ -16,9 +16,7 @@
#
import azurelinuxagent.common.conf as conf
-import azurelinuxagent.common.logger as logger
-from azurelinuxagent.common.utils.textutil import Version
from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION, \
DISTRO_FULL_NAME
diff --git a/azurelinuxagent/pa/rdma/suse.py b/azurelinuxagent/pa/rdma/suse.py
index 20f06cd..2b6ae29 100644
--- a/azurelinuxagent/pa/rdma/suse.py
+++ b/azurelinuxagent/pa/rdma/suse.py
@@ -1,6 +1,6 @@
# Microsoft Azure Linux Agent
#
-# Copyright 2014 Microsoft Corporation
+# Copyright 2017 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -18,7 +18,6 @@
#
import glob
-import os
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.shellutil as shellutil
from azurelinuxagent.common.rdma import RDMAHandler
@@ -37,8 +36,10 @@ class SUSERDMAHandler(RDMAHandler):
return
zypper_install = 'zypper -n in %s'
zypper_install_noref = 'zypper -n --no-refresh in %s'
+ zypper_lock = 'zypper addlock %s'
zypper_remove = 'zypper -n rm %s'
zypper_search = 'zypper -n se -s %s'
+ zypper_unlock = 'zypper removelock %s'
package_name = 'msft-rdma-kmp-default'
cmd = zypper_search % package_name
status, repo_package_info = shellutil.run_get_output(cmd)
@@ -54,13 +55,19 @@ class SUSERDMAHandler(RDMAHandler):
installed = sections[0].strip()
version = sections[3].strip()
driver_package_versions.append(version)
- if fw_version in version and installed == 'i':
+ if fw_version in version and installed.startswith('i'):
info_msg = 'RDMA: Matching driver package "%s-%s" '
info_msg += 'is already installed, nothing to do.'
logger.info(info_msg % (package_name, version))
return True
- if installed == 'i':
+ if installed.startswith('i'):
+ # A driver with a different version is installed
driver_package_installed = True
+ cmd = zypper_unlock % package_name
+ result = shellutil.run(cmd)
+ info_msg = 'Driver with different version installed '
+ info_msg += 'unlocked package "%s".'
+ logger.info(info_msg % (package_name))
# If we get here the driver package is installed but the
# version doesn't match or no package is installed
@@ -80,11 +87,11 @@ class SUSERDMAHandler(RDMAHandler):
logger.info("RDMA: looking for fw version %s in packages" % fw_version)
for entry in driver_package_versions:
- if not fw_version in version:
+ if fw_version not in entry:
logger.info("Package '%s' is not a match." % entry)
else:
logger.info("Package '%s' is a match. Installing." % entry)
- complete_name = '%s-%s' % (package_name, version)
+ complete_name = '%s-%s' % (package_name, entry)
cmd = zypper_install % complete_name
result = shellutil.run(cmd)
if result:
@@ -94,6 +101,11 @@ class SUSERDMAHandler(RDMAHandler):
msg = 'RDMA: Successfully installed "%s" from '
msg += 'configured repositories'
logger.info(msg % complete_name)
+ # Lock the package so it does not accidentally get updated
+ cmd = zypper_lock % package_name
+ result = shellutil.run(cmd)
+ info_msg = 'Applied lock to "%s"' % package_name
+ logger.info(info_msg)
if not self.load_driver_module() or requires_reboot:
self.reboot_system()
return True
@@ -119,6 +131,11 @@ class SUSERDMAHandler(RDMAHandler):
msg = 'RDMA: Successfully installed "%s" from '
msg += 'local package cache'
logger.info(msg % (local_package))
+ # Lock the package so it does not accidentally get updated
+ cmd = zypper_lock % package_name
+ result = shellutil.run(cmd)
+ info_msg = 'Applied lock to "%s"' % package_name
+ logger.info(info_msg)
if not self.load_driver_module() or requires_reboot:
self.reboot_system()
return True