Skip to content

Commit

Permalink
FB8-103: Heartbeat event should carry the latest master timestamp (fa…
Browse files Browse the repository at this point in the history
…cebook#951)

Summary:
Jira issue: https://jira.percona.com/browse/FB8-103

Reference patch: facebook@eb9a3db
Reference patch: facebook@5803e4f
Reference patch: facebook@746c217

Compared to the 5.6 patch:
* fixed the rpl_heartbeat_timestamp testcase, which resulted in the test executor killing all servers because of a connection drop
* split the rpl_heartbeat_zero_timestamp test into two

Heartbeat events should carry the now() timestamp from the master and
the last_master_timstamp from the slave. In other words, HB should always carry
the lastet master timestamp. This will help services connected to slaves know if
they are lagging even when no real events are flowing in the system.

Also, for this to work last_master_timestamp should be updated whenever a HB
event is received.

Let master only send HB timestamp while waiting for new trx, this will fix the SBM being inaccurate when the slave is stopped and trying to catchup from the master.
Pull Request resolved: facebook#951

Reviewed By: lth

Differential Revision: D14034449

Pulled By: lth

fbshipit-source-id: e817153
  • Loading branch information
dutow authored and inikep committed Aug 10, 2020
1 parent c889723 commit ba226bc
Show file tree
Hide file tree
Showing 15 changed files with 440 additions and 11 deletions.
97 changes: 97 additions & 0 deletions mysql-test/suite/rpl/include/rpl_heartbeat_zero_timestamp.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
################################################################################
# Testing the second behind master is correct with timestamp in HB event
################################################################################

source include/not_valgrind.inc;
source include/not_parallel.inc;

# Start a new master-slave
--source include/have_binlog_format_row.inc
--source include/master-slave.inc
--source include/rpl_set_gtid_mode.inc
--source include/have_debug.inc

# Enable MTS
--connection slave
--source include/stop_slave.inc
set @save.slave_parallel_workers= @@global.slave_parallel_workers;
eval SET @@global.slave_parallel_workers= $parallel_workers;
CHANGE MASTER TO MASTER_AUTO_POSITION=1;
--source include/start_slave.inc

# Run a few queries on the master to create skip scneario
--connection master
SET GLOBAL DEBUG="+d, send_zero_hb_event";
let $databases = 4;
let $iter = 10;
--source suite/rpl/include/rpl_heartbeat_zero_timestamp_input.inc
# Make sure that the slave has caught up to the master
--source include/sync_slave_sql_with_master.inc


# Stop the slave
--connection slave
--source include/stop_slave.inc


# Create a lag on the master by running many queries
--connection master
let $databases = 4;
let $iter = 10000;

let $i = $databases;
while ($i)
{
eval drop database test$i;
dec $i;
}

--source suite/rpl/include/rpl_heartbeat_zero_timestamp_input.inc


# Now start slave again so that we can get some HB during skipping
--connection slave
# Create some lag
let $lagging_sec = 5;
sleep $lagging_sec;
# Set HB event interval small enough
let $old_slave_heartbeat_period= query_get_value(select heartbeat_interval from performance_schema.replication_connection_configuration, heartbeat_interval, 1);
let $new_slave_heartbeat_period= 0.1;
# Since the skiping is really small, after this, for sure we should get the event
let $skipping_guarantee_sleep= 1;
eval CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=$new_slave_heartbeat_period;
--source include/start_slave.inc

# Sleep so that we for sure get the binlog events
sleep $skipping_guarantee_sleep;
let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
# Assertion
--let $assert_text = Seconds behind master should be bigger than zero after creating the lag for MTS
--let $assert_cond = $sbm > $lagging_sec;
--source include/assert.inc


# Catch up
--connection master
--source include/sync_slave_sql_with_master.inc


# clean up
--connection master
let $i = $databases;
while ($i)
{
eval drop database test$i;
dec $i;
}

SET GLOBAL DEBUG="-d, send_zero_hb_event";
--source include/sync_slave_sql_with_master.inc

--connection slave
--source include/stop_slave.inc
set @@global.slave_parallel_workers= @save.slave_parallel_workers;
eval CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=$old_slave_heartbeat_period;
--source include/start_slave.inc

--source include/rpl_end.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
################################################################################
# The workload generator on the master for rpl_heartbeat_zero_timestamp
################################################################################

# Create the databases and tables

--disable_query_log
--disable_result_log

let $i = $databases;

while ($i)
{
eval create database test$i;
eval use test$i;
eval create table t$i (a int) engine=InnoDB;
dec $i;
}

# Run the queries

while ($iter)
{
let $i=$databases;

while ($i)
{
eval use test$i;
eval insert into t$i values ($iter);
dec $i;
}
dec $iter;
}

--enable_result_log
--enable_query_log
41 changes: 41 additions & 0 deletions mysql-test/suite/rpl/r/rpl_heartbeat_timestamp.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
include/rpl_init.inc [topology=1->2->3]
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
include/rpl_connect.inc [creating master]
include/rpl_connect.inc [creating master1]
include/rpl_connect.inc [creating slave]
include/rpl_connect.inc [creating slave1]
include/rpl_connect.inc [creating slave_2]
STOP SLAVE;
CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=2;
START SLAVE;
STOP SLAVE;
CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=2;
SET GLOBAL RESET_SECONDS_BEHIND_MASTER=0;
START SLAVE;
CREATE TABLE t1(a INT);
INSERT INTO t1 VALUES(0);
include/save_master_pos.inc
include/sync_slave_sql.inc
include/save_master_pos.inc
include/sync_slave_sql.inc
include/assert.inc [Seconds behind master should be between 0 and HB period]
STOP SLAVE;
include/assert.inc [Seconds behind master should keep increasing when intermidiate slave is stopped]
START SLAVE;
include/rpl_stop_server.inc [server_number=1]
include/assert.inc [Seconds behind master should keep increasing when master is killed]
include/rpl_start_server.inc [server_number=1]
DROP TABLE t1;
STOP SLAVE;
CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=30.000;
START SLAVE;
STOP SLAVE;
CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=30.000;
SET GLOBAL RESET_SECONDS_BEHIND_MASTER=1;
START SLAVE;
include/rpl_end.inc
7 changes: 7 additions & 0 deletions mysql-test/suite/rpl/t/rpl_heartbeat_timestamp.cnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
!include ../my.cnf


[mysqld.3]

[ENV]
SERVER_MYPORT_3= @mysqld.3.port
136 changes: 136 additions & 0 deletions mysql-test/suite/rpl/t/rpl_heartbeat_timestamp.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# Tests the behavior of heartbeat timestamps
#
# First we create a chain topology with two slaves i.e. master->slave->slave_2.
# We disable RESET_SECONDS_BEHIND_MASTER in slave_2 so that
# Seconds_Behind_Master depends on incoming events instead of difference between
# IO and SQL thread. Then we check that when the master is running the lag on
# slave_2 is between 0 and heartbeat period. We then kill the master and
# check if the lag on slave_2 keeps increasing.

source include/not_valgrind.inc;
source include/not_parallel.inc;

let $rpl_server_count= 3;
let $rpl_topology= 1->2->3;
source include/rpl_init.inc;

let $rpl_connection_name= master;
let $rpl_server_number= 1;
source include/rpl_connect.inc;

let $rpl_connection_name= master1;
let $rpl_server_number= 1;
source include/rpl_connect.inc;

let $rpl_connection_name= slave;
let $rpl_server_number= 2;
source include/rpl_connect.inc;

let $rpl_connection_name= slave1;
let $rpl_server_number= 2;
source include/rpl_connect.inc;

let $rpl_connection_name= slave_2;
let $rpl_server_number= 3;
source include/rpl_connect.inc;

connection slave;
let $old_slave_heartbeat_period= query_get_value(select heartbeat_interval from performance_schema.replication_connection_configuration, heartbeat_interval, 1);
let $new_slave_heartbeat_period= 2;
let $heartbeat_guarantee_sleep= `SELECT $new_slave_heartbeat_period + 1`;

connection slave;
STOP SLAVE;
eval CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=$new_slave_heartbeat_period;
START SLAVE;

connection slave_2;
STOP SLAVE;
eval CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=$new_slave_heartbeat_period;
SET GLOBAL RESET_SECONDS_BEHIND_MASTER=0;
START SLAVE;

# Send some binlog events so that last_master_timestamp > 0
connection master;
CREATE TABLE t1(a INT);
INSERT INTO t1 VALUES(0);

# Sync slave 1
connection default;
source include/save_master_pos.inc;
connection slave;
source include/sync_slave_sql.inc;

# Sync slave 2
connection slave;
source include/save_master_pos.inc;
connection slave_2;
source include/sync_slave_sql.inc;

# Check if seconds behind master is between 0 and heartbeat period
connection slave_2;
sleep $heartbeat_guarantee_sleep;
let $first= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);

let $assert_cond= ($first <= $new_slave_heartbeat_period && $first >= 0);
let $assert_text= Seconds behind master should be between 0 and HB period;
source include/assert.inc;


# Check if seconds behind master keeps increasing when intermediate slave is
# stopped
connection slave;
STOP SLAVE;
connection slave_2;
sleep $heartbeat_guarantee_sleep;
let $first= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
sleep $heartbeat_guarantee_sleep;
let $second= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);

let $assert_cond= $first < $second;
let $assert_text= Seconds behind master should keep increasing when intermidiate slave is stopped;
source include/assert.inc;

connection slave;
START SLAVE;


# Kill the master
let $rpl_server_number= 1;
let $rpl_force_stop= 1;
source include/rpl_stop_server.inc;


# Check if seconds behind master keeps increasing
connection slave_2;
sleep $heartbeat_guarantee_sleep;
let $first= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
sleep $heartbeat_guarantee_sleep;
let $second= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);

let $assert_cond= $first < $second;
let $assert_text= Seconds behind master should keep increasing when master is killed;
source include/assert.inc;

let $rpl_server_number= 1;
source include/rpl_start_server.inc;

connection default;
DROP TABLE t1;

# Sync slaves
sync_slave_with_master slave;
sync_slave_with_master slave_2;

connection slave;
STOP SLAVE;
eval CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=$old_slave_heartbeat_period;
START SLAVE;

connection slave_2;
STOP SLAVE;
eval CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=$old_slave_heartbeat_period;
SET GLOBAL RESET_SECONDS_BEHIND_MASTER=1;
START SLAVE;

source include/rpl_end.inc;
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
include/master-slave.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
include/rpl_set_gtid_mode.inc [ON on servers 1,2]
include/stop_slave.inc
set @save.slave_parallel_workers= @@global.slave_parallel_workers;
SET @@global.slave_parallel_workers= 4;
CHANGE MASTER TO MASTER_AUTO_POSITION=1;
include/start_slave.inc
SET GLOBAL DEBUG="+d, send_zero_hb_event";
include/sync_slave_sql_with_master.inc
include/stop_slave.inc
drop database test4;
drop database test3;
drop database test2;
drop database test1;
CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=0.1;
include/start_slave.inc
include/assert.inc [Seconds behind master should be bigger than zero after creating the lag for MTS]
include/sync_slave_sql_with_master.inc
drop database test4;
drop database test3;
drop database test2;
drop database test1;
SET GLOBAL DEBUG="-d, send_zero_hb_event";
include/sync_slave_sql_with_master.inc
include/stop_slave.inc
set @@global.slave_parallel_workers= @save.slave_parallel_workers;
CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=30.000;
include/start_slave.inc
include/rpl_end.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
include/master-slave.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
include/rpl_set_gtid_mode.inc [ON on servers 1,2]
include/stop_slave.inc
set @save.slave_parallel_workers= @@global.slave_parallel_workers;
SET @@global.slave_parallel_workers= 0;
CHANGE MASTER TO MASTER_AUTO_POSITION=1;
include/start_slave.inc
SET GLOBAL DEBUG="+d, send_zero_hb_event";
include/sync_slave_sql_with_master.inc
include/stop_slave.inc
drop database test4;
drop database test3;
drop database test2;
drop database test1;
CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=0.1;
include/start_slave.inc
include/assert.inc [Seconds behind master should be bigger than zero after creating the lag for MTS]
include/sync_slave_sql_with_master.inc
drop database test4;
drop database test3;
drop database test2;
drop database test1;
SET GLOBAL DEBUG="-d, send_zero_hb_event";
include/sync_slave_sql_with_master.inc
include/stop_slave.inc
set @@global.slave_parallel_workers= @save.slave_parallel_workers;
CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=30.000;
include/start_slave.inc
include/rpl_end.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
################################################################################
# Testing the second behind master is correct with timestamp in HB event
################################################################################

# Start a new master-slave
let $parallel_workers=4;
--source suite/rpl/include/rpl_heartbeat_zero_timestamp.inc
Loading

0 comments on commit ba226bc

Please sign in to comment.