From e84355283f117e91964fe84eac70f26bb32d7f06 Mon Sep 17 00:00:00 2001 From: "Alexander V. Tikhonov" Date: Fri, 4 Dec 2020 11:22:30 +0300 Subject: [PATCH] Setup replication_sync_timeout at .tarantoolctl Found that tests may fail due to hang in seek_wait() loop on starting and stopping instances. It happened, because instances were not synced within default output timeout which is by default 120 seconds, while replication sync happens only each 300 seconds by default. To fix it replication_sync_timeout should be decreased to some value lower than 'no-output-timeout', so decided to set it to 100 seconds. The issue looked like in tests: --- replication/gh-5140-qsync-casc-rollback.result Mon Oct 19 17:29:46 2020 +++ /rw_bins/test/var/070_replication/gh-5140-qsync-casc-rollback.result Mon Oct 19 17:31:35 2020 @@ -169,56 +169,3 @@ -- all the records are replayed one be one without yields for WAL writes, and -- nothing should change. test_run:cmd('restart server default') - | -test_run:cmd('restart server replica') - | --- - | - true - | ... - Part of tarantool/tarantool#5504 --- .tarantoolctl | 4 ++++ lib/__init__.py | 1 + lib/options.py | 14 ++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/.tarantoolctl b/.tarantoolctl index 5c46f8ac..189cf30c 100644 --- a/.tarantoolctl +++ b/.tarantoolctl @@ -1,6 +1,9 @@ -- Options for test-run tarantoolctl +-- Note: tonumber(nil) is nil. local workdir = os.getenv('TEST_WORKDIR') +local replication_sync_timeout = tonumber(os.getenv('REPLICATION_SYNC_TIMEOUT')) + default_cfg = { pid_file = workdir, wal_dir = workdir, @@ -8,6 +11,7 @@ default_cfg = { vinyl_dir = workdir, log = workdir, background = false, + replication_sync_timeout = replication_sync_timeout, } instance_dir = workdir diff --git a/lib/__init__.py b/lib/__init__.py index 52953df9..c03a6d86 100644 --- a/lib/__init__.py +++ b/lib/__init__.py @@ -57,6 +57,7 @@ def module_init(): soext = sys.platform == 'darwin' and 'dylib' or 'so' os.environ["LUA_PATH"] = SOURCEDIR+"/?.lua;"+SOURCEDIR+"/?/init.lua;;" os.environ["LUA_CPATH"] = BUILDDIR+"/?."+soext+";;" + os.environ["REPLICATION_SYNC_TIMEOUT"] = str(args.replication_sync_timeout) TarantoolServer.find_exe(args.builddir) UnittestServer.find_exe(args.builddir) diff --git a/lib/options.py b/lib/options.py index c5819084..9c81a9a1 100644 --- a/lib/options.py +++ b/lib/options.py @@ -201,6 +201,20 @@ def __init__(self): --valgrind, --long options is passed). Note: The option works now only with parallel testing.""") + parser.add_argument( + "--replication-sync-timeout", + dest="replication_sync_timeout", + default=100, + type=int, + help="""The number of seconds that a replica will wait when + trying to sync with a master in a cluster, or a quorum of + masters, after connecting or during configuration update. + This could fail indefinitely if replication_sync_lag is smaller + than network latency, or if the replica cannot keep pace with + master updates. If replication_sync_timeout expires, the replica + enters orphan status. + Default: 100 [seconds].""") + parser.add_argument( "--luacov", dest="luacov",