Skip to content

Commit

Permalink
Heartbeat event should carry the latest master timestamp
Browse files Browse the repository at this point in the history
Summary:
Heartbeat events should carry the now() timestamp from the master and
the last_master_timstamp from the slave. In other words, HB should always carry
the lastet master timestamp. This will help services connected to slaves know if
they are lagging even when no real events are flowing in the system.

Also, for this to work last_master_timestamp should be updated whenever a HB
event is received.

Reviewed By: tianx

Differential Revision: D5006919

fbshipit-source-id: 6258aff
  • Loading branch information
abhinav04sharma authored and facebook-github-bot committed May 23, 2017
1 parent ab9d60b commit eb9a3db
Show file tree
Hide file tree
Showing 9 changed files with 219 additions and 10 deletions.
41 changes: 41 additions & 0 deletions mysql-test/suite/rpl/r/rpl_heartbeat_timestamp.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
include/rpl_init.inc [topology=1->2->3]
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
include/rpl_connect.inc [creating master]
include/rpl_connect.inc [creating master1]
include/rpl_connect.inc [creating slave]
include/rpl_connect.inc [creating slave1]
include/rpl_connect.inc [creating slave_2]
STOP SLAVE;
CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=2;
START SLAVE;
STOP SLAVE;
CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=2;
SET GLOBAL RESET_SECONDS_BEHIND_MASTER=0;
START SLAVE;
CREATE TABLE t1(a INT);
INSERT INTO t1 VALUES(0);
include/save_master_pos.inc
include/sync_slave_sql.inc
include/save_master_pos.inc
include/sync_slave_sql.inc
include/assert.inc [Seconds behind master should be between 0 and HB period]
STOP SLAVE;
include/assert.inc [Seconds behind master should keep increasing when intermidiate slave is stopped]
START SLAVE;
include/rpl_stop_server.inc [server_number=1]
include/assert.inc [Seconds behind master should keep increasing when master is killed]
include/rpl_start_server.inc [server_number=1]
DROP TABLE t1;
STOP SLAVE;
CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=60.000;
START SLAVE;
STOP SLAVE;
CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=60.000;
SET GLOBAL RESET_SECONDS_BEHIND_MASTER=1;
START SLAVE;
include/rpl_end.inc
11 changes: 11 additions & 0 deletions mysql-test/suite/rpl/t/rpl_heartbeat_timestamp.cnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
!include ../my.cnf

[mysqld.1]
log-slave-updates
[mysqld.2]
log-slave-updates
[mysqld.3]
log-slave-updates

[ENV]
SERVER_MYPORT_3= @mysqld.3.port
132 changes: 132 additions & 0 deletions mysql-test/suite/rpl/t/rpl_heartbeat_timestamp.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Tests the behavior of heartbeat timestamps
#
# First we create a chain topology with two slaves i.e. master->slave->slave_2.
# We disable RESET_SECONDS_BEHIND_MASTER in slave_2 so that
# Seconds_Behind_Master depends on incoming events instead of difference between
# IO and SQL thread. Then we check that when the master is running the lag on
# slave_2 is between 0 and heartbeat period. We then kill the master and
# check if the lag on slave_2 keeps increasing.

let $rpl_server_count= 3;
let $rpl_topology= 1->2->3;
source include/rpl_init.inc;

let $rpl_connection_name= master;
let $rpl_server_number= 1;
source include/rpl_connect.inc;

let $rpl_connection_name= master1;
let $rpl_server_number= 1;
source include/rpl_connect.inc;

let $rpl_connection_name= slave;
let $rpl_server_number= 2;
source include/rpl_connect.inc;

let $rpl_connection_name= slave1;
let $rpl_server_number= 2;
source include/rpl_connect.inc;

let $rpl_connection_name= slave_2;
let $rpl_server_number= 3;
source include/rpl_connect.inc;

let $old_slave_heartbeat_period= query_get_value(SHOW GLOBAL STATUS LIKE 'slave_heartbeat_period', Value, 1);
let $new_slave_heartbeat_period= 2;
let $heartbeat_guarantee_sleep= `SELECT $new_slave_heartbeat_period + 1`;

connection slave;
STOP SLAVE;
eval CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=$new_slave_heartbeat_period;
START SLAVE;

connection slave_2;
STOP SLAVE;
eval CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=$new_slave_heartbeat_period;
SET GLOBAL RESET_SECONDS_BEHIND_MASTER=0;
START SLAVE;

# Send some binlog events so that last_master_timestamp > 0
connection master;
CREATE TABLE t1(a INT);
INSERT INTO t1 VALUES(0);

# Sync slave 1
connection master;
source include/save_master_pos.inc;
connection slave;
source include/sync_slave_sql.inc;

# Sync slave 2
connection slave;
source include/save_master_pos.inc;
connection slave_2;
source include/sync_slave_sql.inc;

# Check if seconds behind master is between 0 and heartbeat period
connection slave_2;
sleep $heartbeat_guarantee_sleep;
let $first= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);

let $assert_cond= ($first <= $new_slave_heartbeat_period && $first >= 0);
let $assert_text= Seconds behind master should be between 0 and HB period;
source include/assert.inc;


# Check if seconds behind master keeps increasing when intermediate slave is
# stopped
connection slave;
STOP SLAVE;
connection slave_2;
sleep $heartbeat_guarantee_sleep;
let $first= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
sleep $heartbeat_guarantee_sleep;
let $second= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);

let $assert_cond= $first < $second;
let $assert_text= Seconds behind master should keep increasing when intermidiate slave is stopped;
source include/assert.inc;

connection slave;
START SLAVE;


# Kill the master
let $rpl_server_number= 1;
let $rpl_force_stop= 1;
source include/rpl_stop_server.inc;


# Check if seconds behind master keeps increasing
connection slave_2;
sleep $heartbeat_guarantee_sleep;
let $first= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
sleep $heartbeat_guarantee_sleep;
let $second= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);

let $assert_cond= $first < $second;
let $assert_text= Seconds behind master should keep increasing when master is killed;
source include/assert.inc;

let $rpl_server_number= 1;
source include/rpl_start_server.inc;

connection master;
DROP TABLE t1;

# Sync slaves
sync_slave_with_master slave;
sync_slave_with_master slave_2;

connection slave;
STOP SLAVE;
eval CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=$old_slave_heartbeat_period;
START SLAVE;

connection slave_2;
STOP SLAVE;
eval CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=$old_slave_heartbeat_period;
SET GLOBAL RESET_SECONDS_BEHIND_MASTER=1;
START SLAVE;

source include/rpl_end.inc;
1 change: 1 addition & 0 deletions sql/binlog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2683,6 +2683,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
index_file_name[0] = 0;
engine_binlog_file[0] = 0;
engine_binlog_max_gtid.clear();
last_master_timestamp.store(0);
memset(&index_file, 0, sizeof(index_file));
memset(&purge_index_file, 0, sizeof(purge_index_file));
memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
Expand Down
3 changes: 3 additions & 0 deletions sql/binlog.h
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,9 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
my_off_t engine_binlog_pos;
Gtid engine_binlog_max_gtid;

// copy of Relay_log_info::last_master_timestamp
std::atomic<time_t> last_master_timestamp;

#ifdef HAVE_PSI_INTERFACE
void set_psi_keys(PSI_mutex_key key_LOCK_index,
PSI_mutex_key key_LOCK_commit,
Expand Down
12 changes: 7 additions & 5 deletions sql/rpl_master.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <errno.h>
#include <sys/socket.h>
#include "binlog.h"
#include "rpl_slave.h"

int max_binlog_dump_events = 0; // unlimited
my_bool opt_sporadic_binlog_dump_fail = 0;
Expand Down Expand Up @@ -614,11 +615,12 @@ static int send_heartbeat_event(NET* net, String* packet,
char header[LOG_EVENT_HEADER_LEN];
my_bool do_checksum= checksum_alg_arg != BINLOG_CHECKSUM_ALG_OFF &&
checksum_alg_arg != BINLOG_CHECKSUM_ALG_UNDEF;
/*
'when' (the timestamp) is set to 0 so that slave could distinguish between
real and fake Rotate events (if necessary)
*/
memset(header, 0, 4); // when

// NOTE: if @last_master_timestamp is provided we're a slave and we use this
// value in the HB event otherwise we use now()
time_t ts= mysql_bin_log.last_master_timestamp.load();
if (!ts) ts= time(0);
memcpy(header, &ts, 4);

header[EVENT_TYPE_OFFSET] = HEARTBEAT_LOG_EVENT;

Expand Down
3 changes: 1 addition & 2 deletions sql/rpl_rli.cc
Original file line number Diff line number Diff line change
Expand Up @@ -353,8 +353,7 @@ void Relay_log_info::reset_notified_checkpoint(ulong shift, time_t new_ts,
*/
if (new_ts > last_master_timestamp)
{
penultimate_master_timestamp= last_master_timestamp;
last_master_timestamp= std::min(time(nullptr), new_ts);
set_last_master_timestamp(std::min(time(nullptr), new_ts));
}

if (need_data_lock)
Expand Down
8 changes: 8 additions & 0 deletions sql/rpl_rli.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,11 +319,19 @@ class Relay_log_info : public Rpl_info
*/
Enum_slave_caughtup slave_has_caughtup;

// NOTE: a copy is also maintained in MYSQL_BIN_LOG
time_t last_master_timestamp;

// cache value for sql thread
time_t penultimate_master_timestamp;

void set_last_master_timestamp(time_t ts)
{
penultimate_master_timestamp= last_master_timestamp;
last_master_timestamp= ts;
mysql_bin_log.last_master_timestamp.store(last_master_timestamp);
}

#define PEAK_LAG_MAX_SECS 512
time_t peak_lag_last[PEAK_LAG_MAX_SECS];
ulong events_since_last_sample;
Expand Down
18 changes: 15 additions & 3 deletions sql/rpl_slave.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4594,9 +4594,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)

if (tentative_last_master_timestamp > rli->last_master_timestamp)
{
rli->penultimate_master_timestamp= rli->last_master_timestamp;
rli->last_master_timestamp= std::min(tentative_last_master_timestamp,
time(nullptr));
rli->set_last_master_timestamp(std::min(tentative_last_master_timestamp,
time(nullptr)));
}
DBUG_ASSERT(rli->last_master_timestamp >= 0);
}
Expand Down Expand Up @@ -7470,6 +7469,17 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
mi->received_heartbeats++;
mi->last_heartbeat= my_time(0);

/*
Update the last_master_timestamp if the heartbeat from the master
has a greater timestamp value, this makes sure last_master_timestamp
is always monotonically increasing
*/
mysql_mutex_lock(&rli->data_lock);
if (hb.when.tv_sec > rli->last_master_timestamp)
{
rli->set_last_master_timestamp(hb.when.tv_sec);
}
mysql_mutex_unlock(&rli->data_lock);

/*
During GTID protocol, if the master skips transactions,
Expand Down Expand Up @@ -9167,6 +9177,8 @@ int reset_slave(THD *thd, Master_info* mi)
goto err;
}

mysql_bin_log.last_master_timestamp.store(0);

/* Clear master's log coordinates and associated information */
DBUG_ASSERT(!mi->rli || !mi->rli->slave_running); // none writes in rli table
mi->clear_in_memory_info(thd->lex->reset_slave_info.all);
Expand Down

0 comments on commit eb9a3db

Please sign in to comment.