summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Powers <josh.powers@canonical.com>2018-05-16 08:16:10 -0600
committerChad Smith <chad.smith@canonical.com>2018-05-16 08:16:10 -0600
commit589b542bfb3b6630b931a506ca017635059cef1d (patch)
tree60c0266846342b249a07bb23b28bff519adb0a6b
parent0d7ee5592621d09699d079945ffd6febf16669b2 (diff)
downloadvyos-cloud-init-589b542bfb3b6630b931a506ca017635059cef1d.tar.gz
vyos-cloud-init-589b542bfb3b6630b931a506ca017635059cef1d.zip
tests: restructure SSH and initial connections
The SSH function was retrying and waiting for SSH for over an hour when an SSH connection was failing to be established. This reduces the amount of retries and time between each retry to prevent tests from running for hours. Also restructures how waiting for the system works: the system will attempt to SSH up to the boot timeout time by catching SSH connection failures and retrying until the timeout is reached. If the limit is reached now an exception is thrown to abort the test. Drive by - this also fixes printing of the instance name when collecting the console log, rather than showing a Python object address. Fixes LP: #1758409
-rw-r--r--tests/cloud_tests/collect.py2
-rw-r--r--tests/cloud_tests/platforms/instances.py39
2 files changed, 30 insertions, 11 deletions
diff --git a/tests/cloud_tests/collect.py b/tests/cloud_tests/collect.py
index 1ba72856..78263bf5 100644
--- a/tests/cloud_tests/collect.py
+++ b/tests/cloud_tests/collect.py
@@ -42,7 +42,7 @@ def collect_console(instance, base_dir):
@param base_dir: directory to write console log to
"""
logfile = os.path.join(base_dir, 'console.log')
- LOG.debug('getting console log for %s to %s', instance, logfile)
+ LOG.debug('getting console log for %s to %s', instance.name, logfile)
try:
data = instance.console_log()
except NotImplementedError as e:
diff --git a/tests/cloud_tests/platforms/instances.py b/tests/cloud_tests/platforms/instances.py
index cc439d29..95bc3b16 100644
--- a/tests/cloud_tests/platforms/instances.py
+++ b/tests/cloud_tests/platforms/instances.py
@@ -87,7 +87,12 @@ class Instance(TargetBase):
self._ssh_client = None
def _ssh_connect(self):
- """Connect via SSH."""
+ """Connect via SSH.
+
+ Attempt to SSH to the client on the specific IP and port. If it
+ fails in some manner, then retry 2 more times for a total of 3
+ attempts; sleeping a few seconds between attempts.
+ """
if self._ssh_client:
return self._ssh_client
@@ -98,21 +103,22 @@ class Instance(TargetBase):
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
private_key = paramiko.RSAKey.from_private_key_file(self.ssh_key_file)
- retries = 30
+ retries = 3
while retries:
try:
client.connect(username=self.ssh_username,
hostname=self.ssh_ip, port=self.ssh_port,
- pkey=private_key, banner_timeout=30)
+ pkey=private_key)
self._ssh_client = client
return client
except (ConnectionRefusedError, AuthenticationException,
BadHostKeyException, ConnectionResetError, SSHException,
OSError):
retries -= 1
- time.sleep(10)
+ LOG.debug('Retrying ssh connection on connect failure')
+ time.sleep(3)
- ssh_cmd = 'Failed ssh connection to %s@%s:%s after 300 seconds' % (
+ ssh_cmd = 'Failed ssh connection to %s@%s:%s after 3 retries' % (
self.ssh_username, self.ssh_ip, self.ssh_port
)
raise util.InTargetExecuteError(b'', b'', 1, ssh_cmd, 'ssh')
@@ -128,18 +134,31 @@ class Instance(TargetBase):
return ' '.join(l for l in test.strip().splitlines()
if not l.lstrip().startswith('#'))
- time = self.config['boot_timeout']
+ boot_timeout = self.config['boot_timeout']
tests = [self.config['system_ready_script']]
if wait_for_cloud_init:
tests.append(self.config['cloud_init_ready_script'])
formatted_tests = ' && '.join(clean_test(t) for t in tests)
cmd = ('i=0; while [ $i -lt {time} ] && i=$(($i+1)); do {test} && '
- 'exit 0; sleep 1; done; exit 1').format(time=time,
+ 'exit 0; sleep 1; done; exit 1').format(time=boot_timeout,
test=formatted_tests)
- if self.execute(cmd, rcs=(0, 1))[-1] != 0:
- raise OSError('timeout: after {}s system not started'.format(time))
-
+ end_time = time.time() + boot_timeout
+ while True:
+ try:
+ return_code = self.execute(
+ cmd, rcs=(0, 1), description='wait for instance start'
+ )[-1]
+ if return_code == 0:
+ break
+ except util.InTargetExecuteError:
+ LOG.warning("failed to connect via SSH")
+
+ if time.time() < end_time:
+ time.sleep(3)
+ else:
+ raise util.PlatformError('ssh', 'after %ss instance is not '
+ 'reachable' % boot_timeout)
# vi: ts=4 expandtab