Skip to content

Commit

Permalink
Add test_timeout to limit test run time
Browse files Browse the repository at this point in the history
Added 'test-timeout' option to be able to break the test process with
kill signal if the test runs longer than this amount of seconds. By
default it is equal to 110 seconds. This value should be bigger than
'replication-sync-timeout' (which is 100 seconds by default) and
lower than 'no-output-timeout' (which is 120 seconds by default).

This timeout helped to avoid of issues with hanging tests till reach
of 'no-output-timeout' timeout, when overall testing exits. For now
if the test hangs than 'test-timeout' timeout helps to exit the test
processes. It gives the test-run worker chance to restart the failed
test either continue tests in worker queue. Before this fix tests,
hanged, like [1] and [2], for now the same issues resolved, like [3]
and [4] appropriate.

To reproduce the issues like [2], try to set 'test-timeout' not enough
to complete the test on 'restart server ...' command, like:

  ./test-run.py replication/quorum.test.lua --test-timeout 5 \
    --no-output-timeout 10 --conf memtx

The fix resolves the issue #157 together with PR #186, which helps
to kill the instances when SIGTERM couldn't do it.

Part of #157

[1] - https://gitlab.com/tarantool/tarantool/-/jobs/835734706#L4968
[2] - https://gitlab.com/tarantool/tarantool/-/jobs/822649038#L4835
[3] - https://gitlab.com/tarantool/tarantool/-/jobs/874058059#L4993
[4] - https://gitlab.com/tarantool/tarantool/-/jobs/874058745#L5316
  • Loading branch information
avtikhon committed Nov 30, 2020
1 parent 29a0680 commit de99b66
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 5 deletions.
12 changes: 12 additions & 0 deletions lib/app_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

from gevent.subprocess import Popen, PIPE

from lib.colorer import color_stdout
from lib.colorer import color_log
from lib.options import Options
from lib.preprocessor import TestState
from lib.server import Server
from lib.tarantool_server import Test
Expand All @@ -16,12 +18,22 @@
from lib.utils import format_process
from lib.utils import warn_unix_socket
from test import TestRunGreenlet, TestExecutionError
from threading import Timer


def timeout_handler(server_process, test_timeout):
color_stdout("Test timeout of %d secs reached\t" % test_timeout, schema='error')
server_process.kill()


def run_server(execs, cwd, server, logfile, retval):
os.putenv("LISTEN", server.iproto)
server.process = Popen(execs, stdout=PIPE, stderr=PIPE, cwd=cwd)
test_timeout = Options().args.test_timeout
timer = Timer(test_timeout, timeout_handler, [server.process, test_timeout])
timer.start()
stdout, stderr = server.process.communicate()
timer.cancel()
sys.stdout.write(stdout)
with open(logfile, 'a') as f:
f.write(stderr)
Expand Down
8 changes: 8 additions & 0 deletions lib/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,14 @@ def __init__(self):
Such files created by workers in the "var/reproduce" directory.
Note: The option works now only with parallel testing.""")

parser.add_argument(
"--test-timeout",
dest="test_timeout",
default=110,
type=int,
help="""Break the test process with kill signal if the test runs
longer than this amount of seconds. Default: 110 [seconds].""")

parser.add_argument(
"--no-output-timeout",
dest="no_output_timeout",
Expand Down
14 changes: 9 additions & 5 deletions lib/tarantool_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import yaml

from gevent import socket
from gevent import Timeout
from greenlet import GreenletExit

try:
Expand All @@ -24,6 +25,7 @@
from lib.admin_connection import AdminConnection, AdminAsyncConnection
from lib.box_connection import BoxConnection
from lib.colorer import color_stdout, color_log
from lib.options import Options
from lib.preprocessor import TestState
from lib.server import Server
from lib.test import Test
Expand All @@ -38,12 +40,15 @@
def save_join(green_obj, timeout=None):
"""
Gevent join wrapper for
test-run stop-on-crash feature
test-run stop-on-crash/stop-on-timeout feature
:return True in case of crash and False otherwise
:return True in case of crash or test timeout and False otherwise
"""
try:
green_obj.join(timeout=timeout)
green_obj.get(timeout=timeout)
except Timeout:
color_stdout("Test timeout of %d secs reached\t" % timeout, schema='error')
return True
except GreenletExit:
return True
# We don't catch TarantoolStartError here to propagate it to a parent
Expand All @@ -54,7 +59,6 @@ def save_join(green_obj, timeout=None):
class LuaTest(Test):
""" Handle *.test.lua and *.test.sql test files. """

TIMEOUT = 60 * 10
RESULT_FILE_VERSION_INITIAL = 1
RESULT_FILE_VERSION_DEFAULT = 2
RESULT_FILE_VERSION_LINE_RE = re.compile(
Expand Down Expand Up @@ -372,7 +376,7 @@ def execute(self, server):
lua.start()
crash_occured = True
try:
crash_occured = save_join(lua, timeout=self.TIMEOUT)
crash_occured = save_join(lua, timeout=Options().args.test_timeout)
self.killall_servers(server, ts, crash_occured)
except KeyboardInterrupt:
# prevent tests greenlet from writing to the real stdout
Expand Down

0 comments on commit de99b66

Please sign in to comment.