Skip to content

Commit

Permalink
shardbeats for silent shard detection
Browse files Browse the repository at this point in the history
Summary:
1. shardbeats getting inject using INSERT on blackhole table.
2. stop and start of the shardbeater working
3. new status command which prints a good report on shardbeater
4. we capture the most critical failures and also print valuable
log lines to capture promotion time-frames.

Reviewed By: abhinav04sharma

Differential Revision: D32055026

fbshipit-source-id: 3019b0ebb98724936094b10a5b479614ecc9fc2d
  • Loading branch information
anirbanr-fb authored and facebook-github-bot committed Nov 25, 2021
1 parent f9e4f37 commit f7c9e2f
Show file tree
Hide file tree
Showing 27 changed files with 1,806 additions and 5 deletions.
3 changes: 3 additions & 0 deletions include/my_sqlcommand.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,9 @@ enum enum_sql_command {
SQLCOM_PURGE_RAFT_LOG,
SQLCOM_PURGE_RAFT_LOG_BEFORE,
SQLCOM_SHOW_RAFT_LOGS,
SQLCOM_START_SHARDBEATER,
SQLCOM_STOP_SHARDBEATER,
SQLCOM_SHOW_SHARDBEATER_STAT,
/* This should be the last !!! */
SQLCOM_END
};
Expand Down
1 change: 1 addition & 0 deletions mysql-test/r/information_schema_keywords.result
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,7 @@ SERIALIZABLE 0
SERVER 0
SESSION 0
SET 1
SHARDBEATER 0
SHARE 0
SHARED 0
SHOW 1
Expand Down
27 changes: 26 additions & 1 deletion mysql-test/r/mysqld--help-notwin.result
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,9 @@ The following options may be given as the first argument:
This will control the intrinsic tmp table storage engine.
If true then rocksdb intrinsic tmp table will be created.
Otherwise default will be innodb intrinsic tmp tables.
--enable-shardbeater
Enables Shardbeater
(Defaults to on; use --skip-enable-shardbeater to disable.)
--enable-sql-wsenv Enable dumping/loading file to/from warm storage for
SELECT INTO OUTFILE/LOAD DATA. Set true to enable.
--enable-super-log-bin-read-only
Expand Down Expand Up @@ -2273,6 +2276,21 @@ The following options may be given as the first argument:
to TRUE, the plugin will flag associated authenticated
accounts to be mapped to proxy users when the server
option check_proxy_users is enabled.
--shardbeat-blocked-dbs[=name]
List of comma separated database names on which not to
insert shardbeats
--shardbeat-interval-ms[=#]
Interval in milliseconds in which shardbeats are injected
on silent databases
--shardbeat-query-comment-format[=name]
Formatted string to be used for shardbeats insert
--shardbeat-table[=name]
Name of table in which to insert a shardbeat
--shardbeat-user[=name]
Name of user as which to insert regular shardbeats
--shardbeat-vlog-level[=#]
Verbosity level of logging into mysqld error log for
shardbeater
--show-binlogs-encryption
Scan binlogs to determine encryption property during show
binlogs
Expand Down Expand Up @@ -2808,6 +2826,7 @@ enable-query-checksum FALSE
enable-raft-plugin FALSE
enable-resultset-checksum FALSE
enable-rocksdb-intrinsic-tmp-table FALSE
enable-shardbeater TRUE
enable-sql-wsenv FALSE
enable-super-log-bin-read-only FALSE
enable-user-tables-engine-check FALSE
Expand Down Expand Up @@ -3061,7 +3080,7 @@ performance-schema-max-socket-classes 10
performance-schema-max-socket-instances -1
performance-schema-max-sql-text-length 1024
performance-schema-max-stage-classes 175
performance-schema-max-statement-classes 231
performance-schema-max-statement-classes 234
performance-schema-max-statement-stack 10
performance-schema-max-table-handles -1
performance-schema-max-table-instances -1
Expand Down Expand Up @@ -3338,6 +3357,12 @@ session-track-system-variables time_zone,autocommit,character_set_client,charact
session-track-transaction-info OFF
set-read-only-on-shutdown FALSE
sha256-password-proxy-users FALSE
shardbeat-blocked-dbs
shardbeat-interval-ms 60000
shardbeat-query-comment-format
shardbeat-table blackhole
shardbeat-user
shardbeat-vlog-level 0
show-binlogs-encryption TRUE
show-create-table-verbosity FALSE
show-old-temporals FALSE
Expand Down
83 changes: 83 additions & 0 deletions mysql-test/suite/rpl_raft/r/rpl_raft_shardbeats.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
include/raft_3_node.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
include/rpl_connect.inc [creating server_4]
include/rpl_connect.inc [creating server_5]
show status like 'rpl_raft_role';
Variable_name Value
Rpl_raft_role LEADER
show status like 'rpl_raft_role';
Variable_name Value
Rpl_raft_role FOLLOWER
reset master;
show status like 'rpl_raft_role';
Variable_name Value
Rpl_raft_role FOLLOWER
reset master;
create database db1;
create database db2;
show shardbeater status;
ERROR HY000: Shardbeats not ON yet
set global shardbeat_interval_ms = 2000;
set global shardbeat_vlog_level = 1;
start shardbeater;
ERROR HY000: start shardbeater operation is disallowed on when shardbeater user is empty
set global shardbeat_query_comment_format='WH:1 .T:sb .I:{ipaddr} #S:{shard} #rs:{replicaset}';
create user 'dba_scripts:sys.database'@'%';
set global shardbeat_user='dba_scripts:sys.database';
set global shardbeat_table='';
start shardbeater;
ERROR HY000: start shardbeater operation is disallowed on when shardbeat_table is empty
set global shardbeat_table = default;
start shardbeater;
include/assert.inc [no user facing dbs hence test is skipped as well;]
ALTER DATABASE db1 DB_METADATA '{"shard": "50000000", "rs": "1234579"}';
include/assert.inc [user facing dbs hence test is not skipped;]
include/assert.inc [no permissions so we should fail shardbeats]
include/assert.inc [we should get permission error 1142]
GRANT INSERT ON db1.* to 'dba_scripts:sys.database'@'%';
include/assert.inc [now that we have grants shardbeats should go through]
use db1;
create table blackhole(i INT);
include/assert.inc [now that we have grants shardbeats should go through]
ALTER DATABASE db2 DB_METADATA '{"shard": "50000001", "rs": "1234579"}';
GRANT INSERT ON db2.* to 'dba_scripts:sys.database'@'%';
use db2;
create table blackhole(i INT);
include/assert.inc [now that we have grants shardbeats should go through]
include/assert.inc [the number of shardbeats should be 5]
include/assert.inc [the number of shardbeats should be 5]
show shardbeater status;
ERROR HY000: Shardbeats not ON yet
"Transfering leadership: server_1 -> server_2"
set @@global.rpl_raft_new_leader_uuid = 'uuid2';
"The leadership has transferred and server_1 is now a FOLLOWER"

--let = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)
--sleep 5
--let = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)
--let = no new shardbeats on follower
--let = "" = ""
--source include/assert.inc
echo "Transfering leadership: server_2 -> server_1"
set @@global.rpl_raft_new_leader_uuid = 'uuid1';
include/assert.inc [the number of shardbeats should be 5]
include/assert.inc [the number of shardbeats should be 5]
Cleanup
stop shardbeater;
set global shardbeat_interval_ms = default;
set global shardbeat_vlog_level = default;
set global shardbeat_query_comment_format= default;
set global shardbeat_user= default;
set global shardbeat_table= default;
DROP DATABASE db1;
DROP DATABASE db2;
DROP USER IF EXISTS 'dba_scripts:sys.database'@'%';
include/sync_slave_sql_with_master.inc
include/sync_slave_sql_with_master.inc
include/rpl_end.inc
200 changes: 200 additions & 0 deletions mysql-test/suite/rpl_raft/t/rpl_raft_shardbeats.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
source ../include/raft_3_node.inc;
# --source include/master-slave.inc
--source include/have_binlog_format_row.inc

connection server_1;
let $uuid1= `select variable_value from performance_schema.global_status where variable_name = 'Rpl_raft_peer_uuid'`;
connection server_2;
let $uuid2= `select variable_value from performance_schema.global_status where variable_name = 'Rpl_raft_peer_uuid'`;
connection server_3;
let $uuid3= `select variable_value from performance_schema.global_status where variable_name = 'Rpl_raft_peer_uuid'`;


# connection slave
#connection slave;
#set global read_only=1;

# create 2 user dbs. Equivalent of shards
connection server_1;
create database db1;
create database db2;

# Shardbeater is not ON yet.

--error ER_DISALLOWED_OPERATION
show shardbeater status;

set global shardbeat_interval_ms = 2000;
set global shardbeat_vlog_level = 1;

# Shardbeater start should fail since user and table is not populated

--error ER_DISALLOWED_OPERATION
start shardbeater;
#show shardbeater status;

set global shardbeat_query_comment_format='WH:1 .T:sb .I:{ipaddr} #S:{shard} #rs:{replicaset}';

# Create the user
create user 'dba_scripts:sys.database'@'%';
set global shardbeat_user='dba_scripts:sys.database';

# Shardbeater start should fail as the table name is not present

set global shardbeat_table='';
--error ER_DISALLOWED_OPERATION
start shardbeater;

# Set the value of shardbeat_table to blackhole which is yet to be created.
set global shardbeat_table = default;

# Shardbeater start should now succeed.

start shardbeater;
--sleep 5

--let $no_db_ok = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)
--let $db_name = query_get_value(SHOW SHARDBEATER STATUS, Db, 1)
--let $assert_text = no user facing dbs hence test is skipped as well;
--let $assert_cond = $no_db_ok = 0;
--source include/assert.inc

ALTER DATABASE db1 DB_METADATA '{"shard": "50000000", "rs": "1234579"}';
--sleep 5

--let $test_db_ok = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)
--let $test_db_fail = query_get_value(SHOW SHARDBEATER STATUS, Num_Fail, 1)
--let $last_few_fail = query_get_value(SHOW SHARDBEATER STATUS, Last_few_failures, 1)
--let $assert_text = user facing dbs hence test is not skipped;
--let $assert_cond = $test_db_ok = 0;
--source include/assert.inc

--let $assert_text = no permissions so we should fail shardbeats
--let $assert_cond = $test_db_fail > 0;
--source include/assert.inc

--let $assert_text = we should get permission error 1142
--let $assert_cond = "$last_few_fail" LIKE "ErrCode: 1142%"
--source include/assert.inc

# Since grants was missing writes would have failed. Lets now give
# permissions

GRANT INSERT ON db1.* to 'dba_scripts:sys.database'@'%';
--sleep 5

--let $test_db_ok = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)
--let $assert_text = now that we have grants shardbeats should go through
--let $assert_cond = $test_db_ok = 0;
--source include/assert.inc

use db1;
create table blackhole(i INT);
--sleep 5

--let $test_db_ok = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)
--let $assert_text = now that we have grants shardbeats should go through
--let $assert_cond = $test_db_ok > 0;
--source include/assert.inc

ALTER DATABASE db2 DB_METADATA '{"shard": "50000001", "rs": "1234579"}';
GRANT INSERT ON db2.* to 'dba_scripts:sys.database'@'%';
use db2;
create table blackhole(i INT);
--sleep 5

--let $test_db_ok1 = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)
--let $test_db_ok2 = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 2)
--let $test_db_ok = ($test_db_ok1 * $test_db_ok2)

--let $assert_text = now that we have grants shardbeats should go through
--let $assert_cond = $test_db_ok > 0;
--source include/assert.inc

# measure the number of heartbeats in 10 seconds
--let $test_db_ok1_b = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)
--let $test_db_ok2_b = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 2)

--sleep 11

--let $test_db_ok1_a = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)
--let $test_db_ok2_a = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 2)

--let $assert_text = the number of shardbeats should be 5
--let $assert_cond = ($test_db_ok1_a - $test_db_ok1_b) >= 5
--source include/assert.inc

--let $assert_text = the number of shardbeats should be 5
--let $assert_cond = ($test_db_ok2_a - $test_db_ok2_b) >= 5
--source include/assert.inc

connection server_2;
--error ER_DISALLOWED_OPERATION
show shardbeater status;

echo "Transfering leadership: server_1 -> server_2";
connection server_1;
replace_result $uuid2 uuid2;
eval set @@global.rpl_raft_new_leader_uuid = '$uuid2';

let $wait_condition= select @@global.read_only = 1;
source include/wait_condition.inc;

echo "The leadership has transferred and server_1 is now a FOLLOWER"

--let $test_db_ok_b = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)
--sleep 5
--let $test_db_ok_a = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)

--let $assert_text = no new shardbeats on follower
--let $assert_cond = "$test_db_ok_b" = "$test_db_ok_a"
--source include/assert.inc

echo "Transfering leadership: server_2 -> server_1";
connection server_2;
replace_result $uuid1 uuid1;
eval set @@global.rpl_raft_new_leader_uuid = '$uuid1';
connection server_1;
let $wait_condition= select @@global.read_only = 0;
source include/wait_condition.inc;

# measure the number of heartbeats in 10 seconds
--let $test_db_ok1_b = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)
--let $test_db_ok2_b = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 2)

--sleep 11

--let $test_db_ok1_a = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 1)
--let $test_db_ok2_a = query_get_value(SHOW SHARDBEATER STATUS, Num_OK, 2)

--let $assert_text = the number of shardbeats should be 5
--let $assert_cond = ($test_db_ok1_a - $test_db_ok1_b) >= 5
--source include/assert.inc

--let $assert_text = the number of shardbeats should be 5
--let $assert_cond = ($test_db_ok2_a - $test_db_ok2_b) >= 5
--source include/assert.inc

# =============================================================================
# Cleanup
# =============================================================================

--echo Cleanup

connection server_1;
stop shardbeater;
set global shardbeat_interval_ms = default;
set global shardbeat_vlog_level = default;
set global shardbeat_query_comment_format= default;
set global shardbeat_user= default;
set global shardbeat_table= default;
DROP DATABASE db1;
DROP DATABASE db2;
DROP USER IF EXISTS 'dba_scripts:sys.database'@'%';

let $sync_slave_connection= server_2;
source include/sync_slave_sql_with_master.inc;
let $sync_slave_connection= server_3;
source include/sync_slave_sql_with_master.inc;

source include/rpl_end.inc;
6 changes: 6 additions & 0 deletions mysql-test/suite/sys_vars/r/all_vars.result
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ enable_acl_db_cache
enable_acl_db_cache
enable_hlc_bound
enable_hlc_bound
enable_shardbeater
enable_shardbeater
force_pk_for_equality_preds_on_pk
force_pk_for_equality_preds_on_pk
generated_random_password_length
Expand Down Expand Up @@ -109,6 +111,10 @@ regexp_stack_limit
regexp_time_limit
regexp_time_limit
resultset_metadata
shardbeat_interval_ms
shardbeat_interval_ms
shardbeat_vlog_level
shardbeat_vlog_level
sql_require_primary_key
sql_require_primary_key
temptable_use_mmap
Expand Down
Loading

0 comments on commit f7c9e2f

Please sign in to comment.