Skip to content

Commit

Permalink
Fix partial trx condition when waiting for workers to finish
Browse files Browse the repository at this point in the history
Summary:
After per DB commit ordering in D13086060, we buffer transactions in the SQL
thread until we find an event that contains DB information. While syncing
transactions (like DDL which need to be executed in isolation) we need to use
the `trx_queued` flag to figure out if we need to factor in a partial
transaction (i.e. a transaction  which is not fully parsed by SQL thread yet),
otherwise race conditions can occur because SQL thread will not wait for all
transactions to complete before queuing the sync transaction. In the case of DDL,
the race condition can cause the DDL to run before a conflicting trx, this will cause
a deadlock because of commit ordering or MDL locks.

Reviewed By: anirbanr-fb

Differential Revision: D16590942

fbshipit-source-id: 32ef00f
  • Loading branch information
abhinav04sharma authored and facebook-github-bot committed Aug 2, 2019
1 parent c42081d commit 85c6ed5
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 11 deletions.
13 changes: 13 additions & 0 deletions mysql-test/suite/rpl_mts/r/rpl_mts_dependency_ddl_stress.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
include/master-slave.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
STOP SLAVE SQL_THREAD;
create table t1(a int auto_increment primary key) engine = innodb;
start slave sql_thread;
include/sync_slave_sql_with_master.inc
include/diff_tables.inc [master:test.t1, slave:test.t1]
drop table t1;
include/sync_slave_sql_with_master.inc
include/rpl_end.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates
--slave_parallel_workers=16
93 changes: 93 additions & 0 deletions mysql-test/suite/rpl_mts/t/rpl_mts_dependency_ddl_stress.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
source include/master-slave.inc;
source include/have_mts_dependency_replication.inc;
source include/big_test.inc;
source include/not_valgrind.inc;

connection slave;
STOP SLAVE SQL_THREAD;

# generate load
connection master;
create table t1(a int auto_increment primary key) engine = innodb;

disable_query_log;
let $iter = 10000;
let $col_inc = 0;
while ($iter > 0)
{
let $rand= `select round(rand() * 100)`;
# execute a DDL 20% of the time
if ($rand < 20)
{
let $inner_rand= `select round(rand() * 100)`;
if ($inner_rand < 25)
{
drop table t1;
create table t1(a int auto_increment primary key) engine = innodb;
}
if ($inner_rand >= 25 && $inner_rand < 50)
{
eval alter table t1 add column b_$col_inc int;
inc $col_inc;
}
if ($inner_rand >= 50 && $inner_rand < 75)
{
let $c= `select column_name from information_schema.columns where
table_name = 't1' and column_name not like 'a%' limit 1`;
if ($c != "")
{
eval alter table t1 drop column $c;
}
}
if ($inner_rand >= 75)
{
let $c= `select column_name from information_schema.columns where
table_name = 't1' and column_name not like 'a%' limit 1`;
if ($c != "")
{
eval alter table t1 modify $c bigint;
}
}
}
# Execute DML 80% of the time
if ($rand >= 20)
{
let $inner_rand2= `select round(rand() * 100)`;
if ($inner_rand2 < 33)
{
insert into t1 values();
}
if ($inner_rand2 >= 33 && $inner_rand2 < 66)
{
delete from t1 limit 1;
}
if ($inner_rand2 >= 66)
{
let $c= `select column_name from information_schema.columns where
table_name = 't1' and column_name not like 'a%' limit 1`;
if ($c != "")
{
eval update t1 set $c = a;
}
}
}
dec $iter;
}
enable_query_log;

connection slave;
start slave sql_thread;

connection master;
source include/sync_slave_sql_with_master.inc;

# Check if master and slave have the same data
let $diff_tables=master:test.t1, slave:test.t1;
source include/diff_tables.inc;

# Cleanup
connection master;
drop table t1;
source include/sync_slave_sql_with_master.inc;

source include/rpl_end.inc;
2 changes: 1 addition & 1 deletion sql/log_event.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3273,7 +3273,7 @@ void Log_event::schedule_dep(Relay_log_info *rli)

if (unlikely(rli->dep_sync_group))
{
wait_for_dep_workers_to_finish(rli, true);
wait_for_dep_workers_to_finish(rli, rli->trx_queued);
}

handle_terminal_dep_event(rli, ev);
Expand Down
13 changes: 11 additions & 2 deletions sql/rpl_rli_pdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1868,8 +1868,17 @@ void Slave_worker::do_report(loglevel level, int err_code, const char *msg,
c_rli->va_report(level, err_code, buff_coord, msg, args);
}

void wait_for_dep_workers_to_finish(Relay_log_info *rli,
const bool partial_trx)
/**
* Wait for all dependency slave workers to finish working on all enqueued trxs
*
* @param rli Relay log info of the coordinator thread
* @param partial_trx Have we queued a partial transaction?
* If true, we can't blindly wait for the trx counter to be
* zero (since the worker will not be able to complete
* that transaction)
* @return void
*/
void wait_for_dep_workers_to_finish(Relay_log_info *rli, const bool partial_trx)
{
DBUG_ASSERT(rli->mts_dependency_replication);
PSI_stage_info old_stage;
Expand Down
31 changes: 23 additions & 8 deletions sql/rpl_slave.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6421,19 +6421,34 @@ void slave_stop_workers(Relay_log_info *rli, bool *mts_inited)
if (rli->mts_dependency_replication)
{
mysql_mutex_lock(&rli->dep_lock);
// figure out if any worker thread is working on a partially scheduled
// transaction, i.e. if the queue is empty and the SQL thread is maintaning
// a begin event
const bool partial= rli->dep_queue.empty() &&
rli->current_begin_event != nullptr;
// case: clear all buffered transactions if UNTIL condition is not specified
if (likely(rli->until_condition == Relay_log_info::UNTIL_NONE))
// this will be used to determine if we need to deal with a worker that is
// handling a partially enqueued trx
bool partial= false;
// case: until condition is specified, so we have to execute all
// transactions in the queue
if (unlikely(rli->until_condition != Relay_log_info::UNTIL_NONE))
{
// we have a partial trx if the coordinator has set the trx_queued flag,
// this is because this flag is set to false after coordinator sees an end
// event
partial= rli->trx_queued;
}
// case: until condition is not specified
else
{
// the partial check is slightly complicated in this case because we only
// care about partial trx that has been pulled by a worker, since the
// queue is going to be emptied next anyway, we make this check by
// checking of the queue is empty
partial= rli->dep_queue.empty() && rli->trx_queued;
// let's cleanup, we can clear the queue in this case
rli->clear_dep(false);
}
mysql_mutex_unlock(&rli->dep_lock);

wait_for_dep_workers_to_finish(rli, partial);
// case: if UNTIL is specified let's clear after waiting for workers

// case: if UNTIL is specified let's clean up after waiting for workers
if (unlikely(rli->until_condition != Relay_log_info::UNTIL_NONE))
{
rli->clear_dep(true);
Expand Down

0 comments on commit 85c6ed5

Please sign in to comment.