From 6bb2a6a530d06d7dc98d6cbf5f7e1fdc939f2dea Mon Sep 17 00:00:00 2001 From: Alexander Turenko Date: Sat, 5 Dec 2020 01:37:22 +0300 Subject: [PATCH 1/3] Set PWD env variable to a worker working directory test-run copies .tarantooctl file from a test/ directory to a worker's working directory (e.g. test/var/001_app). It would be quite intuitive if tarantoolctl would use this copy when operating on tarantool instances of the worker. Suprisingly, it is not so. tarantoolctl checks whether the file exists inside a directory pointed by the PWD environment variable, not a real current working directory. test-run did not set PWD prior to this commit, however everything work. Why? Because test-run.py is always executed from the test/ directory, which contains the same .tarantoolctl file. Even `make test` / `make test-force` targets issue commands like `cd <...>/test && <...>/test-run.py <...>` (where `cd` sets PWD). The time has come. Let's set PWD and use test/var/001_app/.tarantoolctl configuration for instances of the 001_app worker. Side note: it would be good to fix tarantoolctl too to use a real cwd, not PWD. This inconsistency was spotted, when we perform attempt to remove the test/.tarantoolctl config from the main tarantool repository, move it to the test-run repository, but keep copying into a worker directory. As a side effect, test-run now could be invoked without manual changing a current directory to test/. So, where we run a command like this (example for in-source build): | (cd test && ./test-run.py) The following command can be used: | ./test/test-run.py Nice! Sadly, the trick `Popen(<...>, env=dict(os.environ, PWD=self.vardir))` does not work for us, because we use os.putenv() in test-run and Python tests. os.putenv() does not update os.environ, so we would feed non-actual environment to the subprocess. Because of this, we just set PWD before and after Popen() call. Part of tarantool/tarantool#5504 Part of #78 Co-authored-by: Alexander V. Tikhonov --- lib/__init__.py | 5 +++++ lib/tarantool_server.py | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/lib/__init__.py b/lib/__init__.py index 767f0d63..52953df9 100644 --- a/lib/__init__.py +++ b/lib/__init__.py @@ -35,6 +35,11 @@ def module_init(): os.chdir(path) setenv() + # Keep the PWD environment variable in sync with a current + # working directory. It does not strictly necessary, just to + # avoid any confusion. + os.environ['PWD'] = os.getcwd() + warn_unix_sockets_at_start(args.vardir) # always run with clean (non-existent) 'var' directory diff --git a/lib/tarantool_server.py b/lib/tarantool_server.py index a422b873..9be7fa53 100644 --- a/lib/tarantool_server.py +++ b/lib/tarantool_server.py @@ -848,6 +848,12 @@ def start(self, silent=True, wait=True, wait_load=True, rais=True, args=[], os.putenv("MASTER", self.rpl_master.iproto.uri) self.logfile_pos = self.logfile + # This is strange, but tarantooctl leans on the PWD + # environment variable, not a real current working + # directory, when it performs search for the + # .tarantoolctl configuration file. + os.environ['PWD'] = self.vardir + # redirect stdout from tarantoolctl and tarantool os.putenv("TEST_WORKDIR", self.vardir) self.process = subprocess.Popen(args, @@ -856,6 +862,9 @@ def start(self, silent=True, wait=True, wait_load=True, rais=True, args=[], stderr=self.log_des) del(self.log_des) + # Restore the actual PWD value. + os.environ['PWD'] = os.getcwd() + # gh-19 crash detection self.crash_detector = TestRunGreenlet(self.crash_detect) self.crash_detector.info = "Crash detector: %s" % self.process From 13380c756ef48765d0841a0e24565164ba82bf4b Mon Sep 17 00:00:00 2001 From: "Alexander V. Tikhonov" Date: Mon, 23 Nov 2020 18:11:41 +0000 Subject: [PATCH 2/3] Move .tarantoolctl config to test-run repository Moved .tarantoolctl to test-run tool submodule repository as: /test-run/.tarantoolctl Also set backward compability to use old path location: /test/.tarantoolctl as its primary place. Needed for tarantool/tarantool#5504 Part of #78 --- .tarantoolctl | 15 +++++++++++++++ lib/tarantool_server.py | 10 +++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 .tarantoolctl diff --git a/.tarantoolctl b/.tarantoolctl new file mode 100644 index 00000000..5c46f8ac --- /dev/null +++ b/.tarantoolctl @@ -0,0 +1,15 @@ +-- Options for test-run tarantoolctl + +local workdir = os.getenv('TEST_WORKDIR') +default_cfg = { + pid_file = workdir, + wal_dir = workdir, + memtx_dir = workdir, + vinyl_dir = workdir, + log = workdir, + background = false, +} + +instance_dir = workdir + +-- vim: set ft=lua : diff --git a/lib/tarantool_server.py b/lib/tarantool_server.py index 9be7fa53..481b08f9 100644 --- a/lib/tarantool_server.py +++ b/lib/tarantool_server.py @@ -776,7 +776,15 @@ def copy_files(self): if (e.errno == errno.ENOENT): continue raise - shutil.copy('.tarantoolctl', self.vardir) + # Previously tarantoolctl configuration file located in tarantool + # repository at test/ directory. Currently it is located in root + # path of test-run/ submodule repository. For backward compatibility + # this file should be checked at the old place and only after at + # the current. + tntctl_file = '.tarantoolctl' + if not os.path.exists(tntctl_file): + tntctl_file = os.path.join(self.TEST_RUN_DIR, '.tarantoolctl') + shutil.copy(tntctl_file, self.vardir) shutil.copy(os.path.join(self.TEST_RUN_DIR, 'test_run.lua'), self.vardir) # Need to use get here because of nondefault servers doesn't have ini. From d36db6c5cc3eacad03f75186aaf716035f2a090f Mon Sep 17 00:00:00 2001 From: "Alexander V. Tikhonov" Date: Fri, 4 Dec 2020 11:22:30 +0300 Subject: [PATCH 3/3] Setup replication_sync_timeout at .tarantoolctl Found that tests may fail due to hang in seek_wait() loop on starting and stopping instances. It happened, because instances were not synced within default output timeout which is by default 120 seconds, while replication sync happens only each 300 seconds by default. To fix it replication_sync_timeout should be decreased to some value lower than 'no-output-timeout', so decided to set it to 100 seconds. The issue looked like in tests: --- replication/gh-5140-qsync-casc-rollback.result Mon Oct 19 17:29:46 2020 +++ /rw_bins/test/var/070_replication/gh-5140-qsync-casc-rollback.result Mon Oct 19 17:31:35 2020 @@ -169,56 +169,3 @@ -- all the records are replayed one be one without yields for WAL writes, and -- nothing should change. test_run:cmd('restart server default') - | -test_run:cmd('restart server replica') - | --- - | - true - | ... - Part of tarantool/tarantool#5504 --- .tarantoolctl | 4 ++++ lib/__init__.py | 1 + lib/options.py | 14 ++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/.tarantoolctl b/.tarantoolctl index 5c46f8ac..189cf30c 100644 --- a/.tarantoolctl +++ b/.tarantoolctl @@ -1,6 +1,9 @@ -- Options for test-run tarantoolctl +-- Note: tonumber(nil) is nil. local workdir = os.getenv('TEST_WORKDIR') +local replication_sync_timeout = tonumber(os.getenv('REPLICATION_SYNC_TIMEOUT')) + default_cfg = { pid_file = workdir, wal_dir = workdir, @@ -8,6 +11,7 @@ default_cfg = { vinyl_dir = workdir, log = workdir, background = false, + replication_sync_timeout = replication_sync_timeout, } instance_dir = workdir diff --git a/lib/__init__.py b/lib/__init__.py index 52953df9..c03a6d86 100644 --- a/lib/__init__.py +++ b/lib/__init__.py @@ -57,6 +57,7 @@ def module_init(): soext = sys.platform == 'darwin' and 'dylib' or 'so' os.environ["LUA_PATH"] = SOURCEDIR+"/?.lua;"+SOURCEDIR+"/?/init.lua;;" os.environ["LUA_CPATH"] = BUILDDIR+"/?."+soext+";;" + os.environ["REPLICATION_SYNC_TIMEOUT"] = str(args.replication_sync_timeout) TarantoolServer.find_exe(args.builddir) UnittestServer.find_exe(args.builddir) diff --git a/lib/options.py b/lib/options.py index c5819084..9c81a9a1 100644 --- a/lib/options.py +++ b/lib/options.py @@ -201,6 +201,20 @@ def __init__(self): --valgrind, --long options is passed). Note: The option works now only with parallel testing.""") + parser.add_argument( + "--replication-sync-timeout", + dest="replication_sync_timeout", + default=100, + type=int, + help="""The number of seconds that a replica will wait when + trying to sync with a master in a cluster, or a quorum of + masters, after connecting or during configuration update. + This could fail indefinitely if replication_sync_lag is smaller + than network latency, or if the replica cannot keep pace with + master updates. If replication_sync_timeout expires, the replica + enters orphan status. + Default: 100 [seconds].""") + parser.add_argument( "--luacov", dest="luacov",