From 589b542bfb3b6630b931a506ca017635059cef1d Mon Sep 17 00:00:00 2001 From: Joshua Powers Date: Wed, 16 May 2018 08:16:10 -0600 Subject: tests: restructure SSH and initial connections The SSH function was retrying and waiting for SSH for over an hour when an SSH connection was failing to be established. This reduces the amount of retries and time between each retry to prevent tests from running for hours. Also restructures how waiting for the system works: the system will attempt to SSH up to the boot timeout time by catching SSH connection failures and retrying until the timeout is reached. If the limit is reached now an exception is thrown to abort the test. Drive by - this also fixes printing of the instance name when collecting the console log, rather than showing a Python object address. Fixes LP: #1758409 --- tests/cloud_tests/collect.py | 2 +- tests/cloud_tests/platforms/instances.py | 39 ++++++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 11 deletions(-) (limited to 'tests/cloud_tests') diff --git a/tests/cloud_tests/collect.py b/tests/cloud_tests/collect.py index 1ba72856..78263bf5 100644 --- a/tests/cloud_tests/collect.py +++ b/tests/cloud_tests/collect.py @@ -42,7 +42,7 @@ def collect_console(instance, base_dir): @param base_dir: directory to write console log to """ logfile = os.path.join(base_dir, 'console.log') - LOG.debug('getting console log for %s to %s', instance, logfile) + LOG.debug('getting console log for %s to %s', instance.name, logfile) try: data = instance.console_log() except NotImplementedError as e: diff --git a/tests/cloud_tests/platforms/instances.py b/tests/cloud_tests/platforms/instances.py index cc439d29..95bc3b16 100644 --- a/tests/cloud_tests/platforms/instances.py +++ b/tests/cloud_tests/platforms/instances.py @@ -87,7 +87,12 @@ class Instance(TargetBase): self._ssh_client = None def _ssh_connect(self): - """Connect via SSH.""" + """Connect via SSH. + + Attempt to SSH to the client on the specific IP and port. If it + fails in some manner, then retry 2 more times for a total of 3 + attempts; sleeping a few seconds between attempts. + """ if self._ssh_client: return self._ssh_client @@ -98,21 +103,22 @@ class Instance(TargetBase): client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) private_key = paramiko.RSAKey.from_private_key_file(self.ssh_key_file) - retries = 30 + retries = 3 while retries: try: client.connect(username=self.ssh_username, hostname=self.ssh_ip, port=self.ssh_port, - pkey=private_key, banner_timeout=30) + pkey=private_key) self._ssh_client = client return client except (ConnectionRefusedError, AuthenticationException, BadHostKeyException, ConnectionResetError, SSHException, OSError): retries -= 1 - time.sleep(10) + LOG.debug('Retrying ssh connection on connect failure') + time.sleep(3) - ssh_cmd = 'Failed ssh connection to %s@%s:%s after 300 seconds' % ( + ssh_cmd = 'Failed ssh connection to %s@%s:%s after 3 retries' % ( self.ssh_username, self.ssh_ip, self.ssh_port ) raise util.InTargetExecuteError(b'', b'', 1, ssh_cmd, 'ssh') @@ -128,18 +134,31 @@ class Instance(TargetBase): return ' '.join(l for l in test.strip().splitlines() if not l.lstrip().startswith('#')) - time = self.config['boot_timeout'] + boot_timeout = self.config['boot_timeout'] tests = [self.config['system_ready_script']] if wait_for_cloud_init: tests.append(self.config['cloud_init_ready_script']) formatted_tests = ' && '.join(clean_test(t) for t in tests) cmd = ('i=0; while [ $i -lt {time} ] && i=$(($i+1)); do {test} && ' - 'exit 0; sleep 1; done; exit 1').format(time=time, + 'exit 0; sleep 1; done; exit 1').format(time=boot_timeout, test=formatted_tests) - if self.execute(cmd, rcs=(0, 1))[-1] != 0: - raise OSError('timeout: after {}s system not started'.format(time)) - + end_time = time.time() + boot_timeout + while True: + try: + return_code = self.execute( + cmd, rcs=(0, 1), description='wait for instance start' + )[-1] + if return_code == 0: + break + except util.InTargetExecuteError: + LOG.warning("failed to connect via SSH") + + if time.time() < end_time: + time.sleep(3) + else: + raise util.PlatformError('ssh', 'after %ss instance is not ' + 'reachable' % boot_timeout) # vi: ts=4 expandtab -- cgit v1.2.3