From 0d831c6a390ae67d6d017c86f8a905907b76d558 Mon Sep 17 00:00:00 2001 From: "Alexander V. Tikhonov" Date: Thu, 26 Nov 2020 01:56:16 +0000 Subject: [PATCH] Add test_timeout to limit test run time Added 'test-timeout' option to be able to break the test process with kill signal if the test runs longer than this amount of seconds. By default it is equal to 110 seconds. This value should be bigger than 'replication-sync-timeout' (which is 100 seconds by default) and lower than 'no-output-timeout' (which is 120 seconds by default). This timeout helped to avoid of issues with hanging tests till reach of 'no-output-timeout' timeout, when overall testing exits. For now if the test hangs than 'test-timeout' timeout helps to exit the test processes. It gives the test-run worker chance to restart the failed test either continue tests in worker queue. Before this fix tests, hanged, like [1] and [2], for now the same issues resolved, like [3] and [4] appropriate. To reproduce the issues like [2], try to set 'test-timeout' not enough to complete the test on 'restart server ...' command, like: ./test-run.py replication/quorum.test.lua --test-timeout 5 \ --no-output-timeout 10 --conf memtx The fix resolves the issue #157 together with PR #186, which helps to kill the instances when SIGTERM couldn't do it. Part of #157 [1] - https://gitlab.com/tarantool/tarantool/-/jobs/835734706#L4968 [2] - https://gitlab.com/tarantool/tarantool/-/jobs/822649038#L4835 [3] - https://gitlab.com/tarantool/tarantool/-/jobs/874058059#L4993 [4] - https://gitlab.com/tarantool/tarantool/-/jobs/874058745#L5316 --- lib/app_server.py | 12 ++++++++++++ lib/options.py | 8 ++++++++ lib/tarantool_server.py | 14 +++++++++----- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/lib/app_server.py b/lib/app_server.py index 2cb8a87b..d22b88b2 100644 --- a/lib/app_server.py +++ b/lib/app_server.py @@ -6,7 +6,9 @@ from gevent.subprocess import Popen, PIPE +from lib.colorer import color_stdout from lib.colorer import color_log +from lib.options import Options from lib.preprocessor import TestState from lib.server import Server from lib.tarantool_server import Test @@ -16,12 +18,22 @@ from lib.utils import format_process from lib.utils import warn_unix_socket from test import TestRunGreenlet, TestExecutionError +from threading import Timer + + +def timeout_handler(server_process, test_timeout): + color_stdout("Test timeout of %d secs reached\t" % test_timeout, schema='error') + server_process.kill() def run_server(execs, cwd, server, logfile, retval): os.putenv("LISTEN", server.iproto) server.process = Popen(execs, stdout=PIPE, stderr=PIPE, cwd=cwd) + test_timeout = Options().args.test_timeout + timer = Timer(test_timeout, timeout_handler, [server.process, test_timeout]) + timer.start() stdout, stderr = server.process.communicate() + timer.cancel() sys.stdout.write(stdout) with open(logfile, 'a') as f: f.write(stderr) diff --git a/lib/options.py b/lib/options.py index de8fe412..c2da1e00 100644 --- a/lib/options.py +++ b/lib/options.py @@ -190,6 +190,14 @@ def __init__(self): Such files created by workers in the "var/reproduce" directory. Note: The option works now only with parallel testing.""") + parser.add_argument( + "--test-timeout", + dest="test_timeout", + default=110, + type=int, + help="""Break the test process with kill signal if the test runs + longer than this amount of seconds. Default: 110 [seconds].""") + parser.add_argument( "--no-output-timeout", dest="no_output_timeout", diff --git a/lib/tarantool_server.py b/lib/tarantool_server.py index ea7d2b63..04e1ee28 100644 --- a/lib/tarantool_server.py +++ b/lib/tarantool_server.py @@ -14,6 +14,7 @@ import yaml from gevent import socket +from gevent import Timeout from greenlet import GreenletExit try: @@ -24,6 +25,7 @@ from lib.admin_connection import AdminConnection, AdminAsyncConnection from lib.box_connection import BoxConnection from lib.colorer import color_stdout, color_log +from lib.options import Options from lib.preprocessor import TestState from lib.server import Server from lib.test import Test @@ -38,12 +40,15 @@ def save_join(green_obj, timeout=None): """ Gevent join wrapper for - test-run stop-on-crash feature + test-run stop-on-crash/stop-on-timeout feature - :return True in case of crash and False otherwise + :return True in case of crash or test timeout and False otherwise """ try: - green_obj.join(timeout=timeout) + green_obj.get(timeout=timeout) + except Timeout: + color_stdout("Test timeout of %d secs reached\t" % timeout, schema='error') + return True except GreenletExit: return True # We don't catch TarantoolStartError here to propagate it to a parent @@ -54,7 +59,6 @@ def save_join(green_obj, timeout=None): class LuaTest(Test): """ Handle *.test.lua and *.test.sql test files. """ - TIMEOUT = 60 * 10 RESULT_FILE_VERSION_INITIAL = 1 RESULT_FILE_VERSION_DEFAULT = 2 RESULT_FILE_VERSION_LINE_RE = re.compile( @@ -372,7 +376,7 @@ def execute(self, server): lua.start() crash_occured = True try: - crash_occured = save_join(lua, timeout=self.TIMEOUT) + crash_occured = save_join(lua, timeout=Options().args.test_timeout) self.killall_servers(server, ts, crash_occured) except KeyboardInterrupt: # prevent tests greenlet from writing to the real stdout