Skip to content

Commit

Permalink
Add test_timeout to limit test run time
Browse files Browse the repository at this point in the history
Added 'test-timeout' option to be able to break the test process with
kill signal if the test runs longer than this amount of seconds. By
default it is equal to 110 seconds. This value should be bigger than
'replication-sync-timeout' (which is 100 seconds by default) and
lower than 'no-output-timeout' (which is 120 seconds by default).

This timeout helped to avoid of issues with hanging tests till reach
of 'no-output-timeout' timeout, when overall testing exits. For now
if the test hangs than 'test-timeout' timeout helps to exit the test
processes. It gives the test-run worker chance to restart the failed
test either continue tests in worker queue. Before this fix tests,
hanged, like [1] and [2], for now the same issues resolved, like [3]
and [4] appropriate.

To reproduce the issues like [2], try to set 'test-timeout' not enough
to complete the test on 'restart server ...' command, like:

  ./test-run.py replication/quorum.test.lua --test-timeout 5 \
    --no-output-timeout 10 --conf memtx

This commit implements terminating of stuck AppServer instances by
SIGKILL. However there are still problems regarding stopping and waiting
of non-default instances. They will be resolved in the following PRs /
commits. See PR #244 for details.

Part of #157

[1] - https://gitlab.com/tarantool/tarantool/-/jobs/835734706#L4968
[2] - https://gitlab.com/tarantool/tarantool/-/jobs/822649038#L4835
[3] - https://gitlab.com/tarantool/tarantool/-/jobs/874058059#L4993
[4] - https://gitlab.com/tarantool/tarantool/-/jobs/874058745#L5316
  • Loading branch information
avtikhon authored and Totktonada committed Dec 7, 2020
1 parent e843552 commit 1f6d7ba
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 5 deletions.
12 changes: 12 additions & 0 deletions lib/app_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

from gevent.subprocess import Popen, PIPE

from lib.colorer import color_stdout
from lib.colorer import color_log
from lib.options import Options
from lib.preprocessor import TestState
from lib.server import Server
from lib.server import DEFAULT_SNAPSHOT_NAME
Expand All @@ -17,12 +19,22 @@
from lib.utils import format_process
from lib.utils import warn_unix_socket
from test import TestRunGreenlet, TestExecutionError
from threading import Timer


def timeout_handler(server_process, test_timeout):
color_stdout("Test timeout of %d secs reached\t" % test_timeout, schema='error')
server_process.kill()


def run_server(execs, cwd, server, logfile, retval):
os.putenv("LISTEN", server.iproto)
server.process = Popen(execs, stdout=PIPE, stderr=PIPE, cwd=cwd)
test_timeout = Options().args.test_timeout
timer = Timer(test_timeout, timeout_handler, (server.process, test_timeout))
timer.start()
stdout, stderr = server.process.communicate()
timer.cancel()
sys.stdout.write(stdout)
with open(logfile, 'a') as f:
f.write(stderr)
Expand Down
8 changes: 8 additions & 0 deletions lib/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,14 @@ def __init__(self):
Such files created by workers in the "var/reproduce" directory.
Note: The option works now only with parallel testing.""")

parser.add_argument(
"--test-timeout",
dest="test_timeout",
default=110,
type=int,
help="""Break the test process with kill signal if the test runs
longer than this amount of seconds. Default: 110 [seconds].""")

parser.add_argument(
"--no-output-timeout",
dest="no_output_timeout",
Expand Down
20 changes: 15 additions & 5 deletions lib/tarantool_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import yaml

from gevent import socket
from gevent import Timeout
from greenlet import GreenletExit
from threading import Timer

Expand All @@ -27,6 +28,7 @@
from lib.colorer import color_stdout
from lib.colorer import color_log
from lib.colorer import qa_notice
from lib.options import Options
from lib.preprocessor import TestState
from lib.server import Server
from lib.server import DEFAULT_SNAPSHOT_NAME
Expand All @@ -44,12 +46,21 @@
def save_join(green_obj, timeout=None):
"""
Gevent join wrapper for
test-run stop-on-crash feature
test-run stop-on-crash/stop-on-timeout feature
:return True in case of crash and False otherwise
:return True in case of crash or test timeout and False otherwise
"""
try:
green_obj.join(timeout=timeout)
green_obj.get(timeout=timeout)
except Timeout:
color_stdout("Test timeout of %d secs reached\t" % timeout, schema='error')
# We should kill the greenlet that writes to a temporary
# result file. If the same test is run several times (e.g.
# on different configurations), this greenlet may wake up
# and write to the temporary result file of the new run of
# the test.
green_obj.kill()
return True
except GreenletExit:
return True
# We don't catch TarantoolStartError here to propagate it to a parent
Expand All @@ -60,7 +71,6 @@ def save_join(green_obj, timeout=None):
class LuaTest(Test):
""" Handle *.test.lua and *.test.sql test files. """

TIMEOUT = 60 * 10
RESULT_FILE_VERSION_INITIAL = 1
RESULT_FILE_VERSION_DEFAULT = 2
RESULT_FILE_VERSION_LINE_RE = re.compile(
Expand Down Expand Up @@ -378,7 +388,7 @@ def execute(self, server):
lua.start()
crash_occured = True
try:
crash_occured = save_join(lua, timeout=self.TIMEOUT)
crash_occured = save_join(lua, timeout=Options().args.test_timeout)
self.killall_servers(server, ts, crash_occured)
except KeyboardInterrupt:
# prevent tests greenlet from writing to the real stdout
Expand Down

0 comments on commit 1f6d7ba

Please sign in to comment.