summaryrefslogtreecommitdiff
path: root/azurelinuxagent/ga/update.py
diff options
context:
space:
mode:
Diffstat (limited to 'azurelinuxagent/ga/update.py')
-rw-r--r--azurelinuxagent/ga/update.py252
1 files changed, 210 insertions, 42 deletions
diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py
index e89608a..996484b 100644
--- a/azurelinuxagent/ga/update.py
+++ b/azurelinuxagent/ga/update.py
@@ -16,12 +16,12 @@
#
# Requires Python 2.4+ and Openssl 1.0+
#
+
import glob
import json
import os
import platform
import re
-import shlex
import shutil
import signal
import subprocess
@@ -59,10 +59,14 @@ CHILD_LAUNCH_RESTART_MAX = 3
CHILD_POLL_INTERVAL = 60
MAX_FAILURE = 3 # Max failure allowed for agent before blacklisted
+RETAIN_INTERVAL = 24 * 60 * 60 # Retain interval for black list
GOAL_STATE_INTERVAL = 25
REPORT_STATUS_INTERVAL = 15
-RETAIN_INTERVAL = 24 * 60 * 60 # Retain interval for black list
+
+ORPHAN_WAIT_INTERVAL = 15 * 60 * 60
+
+AGENT_SENTINAL_FILE = "current_version"
def get_update_handler():
@@ -81,7 +85,6 @@ class UpdateHandler(object):
self.protocol_util = get_protocol_util()
self.running = True
- self.last_etag = None
self.last_attempt_time = None
self.agents = []
@@ -126,7 +129,7 @@ class UpdateHandler(object):
try:
# Launch the correct Python version for python-based agents
- cmds = shlex.split(agent_cmd)
+ cmds = textutil.safe_shlex_split(agent_cmd)
if cmds[0].lower() == "python":
cmds[0] = get_python_cmd()
agent_cmd = " ".join(cmds)
@@ -218,13 +221,21 @@ class UpdateHandler(object):
from azurelinuxagent.ga.env import get_env_handler
get_env_handler().run()
- from azurelinuxagent.ga.exthandlers import get_exthandlers_handler
+ from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state
exthandlers_handler = get_exthandlers_handler()
+ migrate_handler_state()
- # TODO: Add means to stop running
try:
+ self._ensure_no_orphans()
+ self._emit_restart_event()
+
+ # TODO: Add means to stop running
while self.running:
- if self._ensure_latest_agent():
+ if self._is_orphaned:
+ logger.info("Goal state agent {0} was orphaned -- exiting", CURRENT_AGENT)
+ break
+
+ if self._upgrade_available():
if len(self.agents) > 0:
logger.info(
u"Agent {0} discovered {1} as an update and will exit",
@@ -234,16 +245,26 @@ class UpdateHandler(object):
exthandlers_handler.run()
- time.sleep(25)
+ time.sleep(GOAL_STATE_INTERVAL)
except Exception as e:
logger.warn(u"Agent {0} failed with exception: {1}", CURRENT_AGENT, ustr(e))
sys.exit(1)
+ return
+ self._shutdown()
sys.exit(0)
return
def forward_signal(self, signum, frame):
+ # Note:
+ # - At present, the handler is registered only for SIGTERM.
+ # However, clean shutdown is both SIGTERM and SIGKILL.
+ # A SIGKILL handler is not being registered at this time to
+ # minimize perturbing the code.
+ if signum in (signal.SIGTERM, signal.SIGKILL):
+ self._shutdown()
+
if self.child_process is None:
return
@@ -258,13 +279,14 @@ class UpdateHandler(object):
self.signal_handler(signum, frame)
elif self.signal_handler is signal.SIG_DFL:
if signum == signal.SIGTERM:
+ # TODO: This should set self.running to False vs. just exiting
sys.exit(0)
return
def get_latest_agent(self):
"""
If autoupdate is enabled, return the most current, downloaded,
- non-blacklisted agent (if any).
+ non-blacklisted agent which is not the current version (if any).
Otherwise, return None (implying to use the installed agent).
"""
@@ -272,10 +294,27 @@ class UpdateHandler(object):
return None
self._load_agents()
- available_agents = [agent for agent in self.agents if agent.is_available]
+ available_agents = [agent for agent in self.agents
+ if agent.is_available
+ and agent.version > FlexibleVersion(AGENT_VERSION)]
+
return available_agents[0] if len(available_agents) >= 1 else None
- def _ensure_latest_agent(self, base_version=CURRENT_VERSION):
+ def _emit_restart_event(self):
+ if not self._is_clean_start:
+ msg = u"{0} did not terminate cleanly".format(CURRENT_AGENT)
+ logger.info(msg)
+ add_event(
+ AGENT_NAME,
+ version=CURRENT_VERSION,
+ op=WALAEventOperation.Restart,
+ is_success=False,
+ message=msg)
+
+ self._set_sentinal()
+ return
+
+ def _upgrade_available(self, base_version=CURRENT_VERSION):
# Ignore new agents if updating is disabled
if not conf.get_autoupdate_enabled():
return False
@@ -306,11 +345,8 @@ class UpdateHandler(object):
message=msg)
return False
- if self.last_etag is not None and self.last_etag == etag:
- logger.info(u"Incarnation {0} has no agent updates", etag)
- return False
-
- manifests = [m for m in manifest_list.vmAgentManifests if m.family == family]
+ manifests = [m for m in manifest_list.vmAgentManifests \
+ if m.family == family and len(m.versionsManifestUris) > 0]
if len(manifests) == 0:
logger.info(u"Incarnation {0} has no agent family {1} updates", etag, family)
return False
@@ -318,7 +354,8 @@ class UpdateHandler(object):
try:
pkg_list = protocol.get_vmagent_pkgs(manifests[0])
except ProtocolError as e:
- msg= u"Incarnation {0} failed to get {1} package list: {2}".format(
+ msg = u"Incarnation {0} failed to get {1} package list: " \
+ u"{2}".format(
etag,
family,
ustr(e))
@@ -331,18 +368,51 @@ class UpdateHandler(object):
message=msg)
return False
- # Set the agents to those available for download at least as current as the existing agent
- # and remove from disk any agent no longer reported to the VM.
+ # Set the agents to those available for download at least as current
+ # as the existing agent and remove from disk any agent no longer
+ # reported to the VM.
# Note:
- # The code leaves on disk available, but blacklisted, agents so as to preserve the state.
- # Otherwise, those agents could be again downloaded and inappropriately retried.
- self._set_agents([GuestAgent(pkg=pkg) for pkg in pkg_list.versions])
+ # The code leaves on disk available, but blacklisted, agents so as to
+ # preserve the state. Otherwise, those agents could be again
+ # downloaded and inappropriately retried.
+ host = None
+ if protocol and protocol.client:
+ host = protocol.client.get_host_plugin()
+ self._set_agents([GuestAgent(pkg=pkg, host=host) for pkg in pkg_list.versions])
self._purge_agents()
self._filter_blacklisted_agents()
# Return True if agents more recent than the current are available
return len(self.agents) > 0 and self.agents[0].version > base_version
+ def _ensure_no_orphans(self, orphan_wait_interval=ORPHAN_WAIT_INTERVAL):
+ previous_pid_file, pid_file = self._write_pid_file()
+ if previous_pid_file is not None:
+ try:
+ pid = fileutil.read_file(previous_pid_file)
+ wait_interval = orphan_wait_interval
+ while self.osutil.check_pid_alive(pid):
+ wait_interval -= GOAL_STATE_INTERVAL
+ if wait_interval <= 0:
+ logger.warn(
+ u"{0} forcibly terminated orphan process {1}",
+ CURRENT_AGENT,
+ pid)
+ os.kill(pid, signal.SIGKILL)
+ break
+
+ logger.info(
+ u"{0} waiting for orphan process {1} to terminate",
+ CURRENT_AGENT,
+ pid)
+ time.sleep(GOAL_STATE_INTERVAL)
+
+ except Exception as e:
+ logger.warn(
+ u"Exception occurred waiting for orphan agent to terminate: {0}",
+ ustr(e))
+ return
+
def _evaluate_agent_health(self, latest_agent):
"""
Evaluate the health of the selected agent: If it is restarting
@@ -375,18 +445,61 @@ class UpdateHandler(object):
self.agents = [agent for agent in self.agents if not agent.is_blacklisted]
return
+ def _get_pid_files(self):
+ pid_file = conf.get_agent_pid_file_path()
+
+ pid_dir = os.path.dirname(pid_file)
+ pid_name = os.path.basename(pid_file)
+
+ pid_re = re.compile("(\d+)_{0}".format(re.escape(pid_name)))
+ pid_files = [int(pid_re.match(f).group(1)) for f in os.listdir(pid_dir) if pid_re.match(f)]
+ pid_files.sort()
+
+ pid_index = -1 if len(pid_files) <= 0 else pid_files[-1]
+ previous_pid_file = None \
+ if pid_index < 0 \
+ else os.path.join(pid_dir, "{0}_{1}".format(pid_index, pid_name))
+ pid_file = os.path.join(pid_dir, "{0}_{1}".format(pid_index+1, pid_name))
+ return previous_pid_file, pid_file
+
+ @property
+ def _is_clean_start(self):
+ if not os.path.isfile(self._sentinal_file_path()):
+ return True
+
+ try:
+ if fileutil.read_file(self._sentinal_file_path()) != CURRENT_AGENT:
+ return True
+ except Exception as e:
+ logger.warn(
+ u"Exception reading sentinal file {0}: {1}",
+ self._sentinal_file_path(),
+ str(e))
+
+ return False
+
+ @property
+ def _is_orphaned(self):
+ parent_pid = os.getppid()
+ if parent_pid in (1, None):
+ return True
+
+ if not os.path.isfile(conf.get_agent_pid_file_path()):
+ return True
+
+ return fileutil.read_file(conf.get_agent_pid_file_path()) != ustr(parent_pid)
+
def _load_agents(self):
"""
Load all non-blacklisted agents currently on disk.
"""
- if len(self.agents) <= 0:
- try:
- path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))
- self._set_agents([GuestAgent(path=agent_dir)
- for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)])
- self._filter_blacklisted_agents()
- except Exception as e:
- logger.warn(u"Exception occurred loading available agents: {0}", ustr(e))
+ try:
+ path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))
+ self._set_agents([GuestAgent(path=agent_dir)
+ for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)])
+ self._filter_blacklisted_agents()
+ except Exception as e:
+ logger.warn(u"Exception occurred loading available agents: {0}", ustr(e))
return
def _purge_agents(self):
@@ -423,10 +536,51 @@ class UpdateHandler(object):
self.agents.sort(key=lambda agent: agent.version, reverse=True)
return
+ def _set_sentinal(self, agent=CURRENT_AGENT):
+ try:
+ fileutil.write_file(self._sentinal_file_path(), agent)
+ except Exception as e:
+ logger.warn(
+ u"Exception writing sentinal file {0}: {1}",
+ self._sentinal_file_path(),
+ str(e))
+ return
+
+ def _sentinal_file_path(self):
+ return os.path.join(conf.get_lib_dir(), AGENT_SENTINAL_FILE)
+
+ def _shutdown(self):
+ if not os.path.isfile(self._sentinal_file_path()):
+ return
+
+ try:
+ os.remove(self._sentinal_file_path())
+ except Exception as e:
+ logger.warn(
+ u"Exception removing sentinal file {0}: {1}",
+ self._sentinal_file_path(),
+ str(e))
+ return
+
+ def _write_pid_file(self):
+ previous_pid_file, pid_file = self._get_pid_files()
+ try:
+ fileutil.write_file(pid_file, ustr(os.getpid()))
+ logger.info(u"{0} running as process {1}", CURRENT_AGENT, ustr(os.getpid()))
+ except Exception as e:
+ pid_file = None
+ logger.warn(
+ u"Expection writing goal state agent {0} pid to {1}: {2}",
+ CURRENT_AGENT,
+ pid_file,
+ ustr(e))
+ return previous_pid_file, pid_file
+
class GuestAgent(object):
- def __init__(self, path=None, pkg=None):
+ def __init__(self, path=None, pkg=None, host=None):
self.pkg = pkg
+ self.host = host
version = None
if path is not None:
m = AGENT_DIR_PATTERN.match(path)
@@ -543,18 +697,15 @@ class GuestAgent(object):
return
def _download(self):
- package = None
-
for uri in self.pkg.uris:
- try:
- resp = restutil.http_get(uri.uri, chk_proxy=True)
- if resp.status == restutil.httpclient.OK:
- package = resp.read()
- fileutil.write_file(self.get_agent_pkg_path(), bytearray(package), asbin=True)
- logger.info(u"Agent {0} downloaded from {1}", self.name, uri.uri)
- break
- except restutil.HttpError as e:
- logger.warn(u"Agent {0} download from {1} failed", self.name, uri.uri)
+ if self._fetch(uri.uri):
+ break
+ else:
+ if self.host is not None:
+ logger.info("Download unsuccessful, falling back to host plugin")
+ uri, headers = self.host.get_artifact_request(uri.uri, self.host.manifest_uri)
+ if self._fetch(uri, headers=headers):
+ break
if not os.path.isfile(self.get_agent_pkg_path()):
msg = u"Unable to download Agent {0} from any URI".format(self.name)
@@ -567,6 +718,23 @@ class GuestAgent(object):
raise UpdateError(msg)
return
+ def _fetch(self, uri, headers=None):
+ package = None
+ try:
+ resp = restutil.http_get(uri, chk_proxy=True, headers=headers)
+ if resp.status == restutil.httpclient.OK:
+ package = resp.read()
+ fileutil.write_file(self.get_agent_pkg_path(),
+ bytearray(package),
+ asbin=True)
+ logger.info(u"Agent {0} downloaded from {1}", self.name, uri)
+ except restutil.HttpError as http_error:
+ logger.verbose(u"Agent {0} download from {1} failed [{2}]",
+ self.name,
+ uri,
+ http_error)
+ return package is not None
+
def _load_error(self):
try:
if self.error is None: