From a24795709e952b09c47ca406a3aba81dc4f73899 Mon Sep 17 00:00:00 2001 From: Przemyslaw Skibinski Date: Thu, 30 Jul 2020 16:01:48 +0200 Subject: [PATCH] Add a new option to make MySQL server truncate binlogs during crash recovery. (#1061) Summary: This diff adds a new option to trim binlog during recovery. The following happens if this option is set: 1. All prepared trasnactions in the engine are rolled back during engine recovery 2. The binlog file (which was marked 'IN USE') will be truncated to the position as reported by engine as the last commited gtid position in the binlog Reference Patch: https://github.com/facebook/mysql-5.6/commit/96308865f3c Reference Patch: https://github.com/facebook/mysql-5.6/commit/6288eda9d6e Reference Patch: https://github.com/facebook/mysql-5.6/commit/12937e8de06 Reference Patch: https://github.com/facebook/mysql-5.6/commit/fe3826a6743 Originally Reviewed By: abhinav04sharma Pull Request resolved: https://github.com/facebook/mysql-5.6/pull/1061 Test Plan: Build mysql Reviewed By: luqun Differential Revision: D18812324 Pulled By: bhatvinay --- mysql-test/include/have_rocksdb.inc | 11 + mysql-test/r/mysqld--help-notwin.result | 5 + .../r/binlog_rotate_block_on_trxs_rbr.result | 49 +++++ .../r/binlog_rotate_block_on_trxs_sbr.result | 45 ++++ .../binlog/r/binlog_trim_option_only.result | 14 ++ .../r/binlog_truncate_across_flush_rbr.result | 91 ++++++++ .../r/binlog_truncate_across_flush_sbr.result | 84 ++++++++ .../r/binlog_truncate_crash_recovery.result | 38 ++++ .../binlog/t/binlog_rotate_block_on_trxs.inc | 71 ++++++ ...binlog_rotate_block_on_trxs_rbr-master.opt | 3 + .../t/binlog_rotate_block_on_trxs_rbr.test | 9 + ...binlog_rotate_block_on_trxs_sbr-master.opt | 3 + .../t/binlog_rotate_block_on_trxs_sbr.test | 9 + .../t/binlog_trim_option_only-master.opt | 2 + .../t/binlog_trim_option_only-slave.opt | 2 + .../binlog/t/binlog_trim_option_only.test | 23 ++ .../binlog/t/binlog_truncate_across_flush.inc | 123 +++++++++++ ...inlog_truncate_across_flush_rbr-master.opt | 2 + .../t/binlog_truncate_across_flush_rbr.test | 22 ++ ...inlog_truncate_across_flush_sbr-master.opt | 2 + .../t/binlog_truncate_across_flush_sbr.test | 22 ++ .../binlog_truncate_crash_recovery-master.opt | 1 + .../t/binlog_truncate_crash_recovery.test | 72 +++++++ .../innodb/r/mysqldump_max_recordsize.result | 1 + .../innodb/t/mysqldump_max_recordsize.test | 1 + mysql-test/suite/perfschema/r/relaylog.result | 6 + .../rocksdb/r/binlog_truncate_backup.result | 28 +++ .../t/binlog_truncate_backup-master.opt | 2 + .../rocksdb/t/binlog_truncate_backup.test | 75 +++++++ sql/binlog.cc | 203 +++++++++++++++++- sql/binlog.h | 22 +- sql/mysqld.cc | 29 ++- sql/mysqld.h | 5 +- sql/options_mysqld.h | 3 +- sql/rpl_rli.cc | 5 +- sql/sql_class.h | 1 + sql/xa.cc | 7 +- 37 files changed, 1070 insertions(+), 21 deletions(-) create mode 100644 mysql-test/include/have_rocksdb.inc create mode 100644 mysql-test/suite/binlog/r/binlog_rotate_block_on_trxs_rbr.result create mode 100644 mysql-test/suite/binlog/r/binlog_rotate_block_on_trxs_sbr.result create mode 100644 mysql-test/suite/binlog/r/binlog_trim_option_only.result create mode 100644 mysql-test/suite/binlog/r/binlog_truncate_across_flush_rbr.result create mode 100644 mysql-test/suite/binlog/r/binlog_truncate_across_flush_sbr.result create mode 100644 mysql-test/suite/binlog/r/binlog_truncate_crash_recovery.result create mode 100644 mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs.inc create mode 100644 mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_rbr-master.opt create mode 100644 mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_rbr.test create mode 100644 mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_sbr-master.opt create mode 100644 mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_sbr.test create mode 100644 mysql-test/suite/binlog/t/binlog_trim_option_only-master.opt create mode 100644 mysql-test/suite/binlog/t/binlog_trim_option_only-slave.opt create mode 100644 mysql-test/suite/binlog/t/binlog_trim_option_only.test create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_across_flush.inc create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_across_flush_rbr-master.opt create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_across_flush_rbr.test create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_across_flush_sbr-master.opt create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_across_flush_sbr.test create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_crash_recovery-master.opt create mode 100644 mysql-test/suite/binlog/t/binlog_truncate_crash_recovery.test create mode 100644 mysql-test/suite/rocksdb/r/binlog_truncate_backup.result create mode 100644 mysql-test/suite/rocksdb/t/binlog_truncate_backup-master.opt create mode 100644 mysql-test/suite/rocksdb/t/binlog_truncate_backup.test diff --git a/mysql-test/include/have_rocksdb.inc b/mysql-test/include/have_rocksdb.inc new file mode 100644 index 000000000000..78e4dc54e61e --- /dev/null +++ b/mysql-test/include/have_rocksdb.inc @@ -0,0 +1,11 @@ +if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'rocksdb' AND support IN ('DEFAULT')`) +{ + --skip Test requires default engine RocksDB +} + +--disable_query_log +# Table statistics can vary depending on when the memtables are flushed, so +# flush them at the beginning of the test to ensure the test runs consistently. +set global rocksdb_force_flush_memtable_now = true; +--enable_query_log + diff --git a/mysql-test/r/mysqld--help-notwin.result b/mysql-test/r/mysqld--help-notwin.result index 77ff281ec937..e73e284a6b53 100644 --- a/mysql-test/r/mysqld--help-notwin.result +++ b/mysql-test/r/mysqld--help-notwin.result @@ -1584,6 +1584,10 @@ The following options may be given as the first argument: This option is used to let the server know when to extract the write set which will be used for various purposes. + --trim-binlog-to-recover + Trim the last binlog (if required) to the position until + which the engine has successfully committed all + transactions. --updatable-views-with-limit=name YES = Don't issue an error message (warning only) if a VIEW without presence of a key of the underlying table is @@ -2031,6 +2035,7 @@ transaction-isolation REPEATABLE-READ transaction-prealloc-size 4096 transaction-read-only FALSE transaction-write-set-extraction XXHASH64 +trim-binlog-to-recover FALSE updatable-views-with-limit YES upgrade AUTO validate-config FALSE diff --git a/mysql-test/suite/binlog/r/binlog_rotate_block_on_trxs_rbr.result b/mysql-test/suite/binlog/r/binlog_rotate_block_on_trxs_rbr.result new file mode 100644 index 000000000000..90c70fea0ec1 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_rotate_block_on_trxs_rbr.result @@ -0,0 +1,49 @@ +CALL mtr.add_suppression("Timeout waiting for reply of binlog *"); +SET @save.rpl_semi_sync_master_timeout = @@global.rpl_semi_sync_master_timeout; +SET @save.rpl_semi_sync_master_enabled = @@global.rpl_semi_sync_master_enabled; +[connection default] +CREATE TABLE t1(c1 INT PRIMARY KEY); +CREATE TABLE blackhole(c1 INT PRIMARY KEY) ENGINE=blackhole; +INSERT INTO t1 VALUES(1); +COMMIT; +# Create a 20 sec semisync timeout +SET global rpl_semi_sync_master_timeout=20000; +SET global rpl_semi_sync_master_enabled=1; +[connection conn1] +INSERT INTO blackhole VALUES (1);; +[connection conn2] +FLUSH LOGS; +[connection conn2] +[connection conn1] +[connection default] +INSERT INTO t1 VALUES (2); +# The first binlog file should contain the trx on blackhole table since +# the flush was blocked by the trx. The insert (of value 2) into t1 +# should be in the rotated file (second binlog file) +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (mtr.test_suppressions) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # use `test`; CREATE TABLE t1(c1 INT PRIMARY KEY) +master-bin.000001 # Query # # use `test`; CREATE TABLE blackhole(c1 INT PRIMARY KEY) ENGINE=blackhole +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.blackhole) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Query # # COMMIT +master-bin.000001 # Rotate # # master-bin.000002;pos=POS +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000002 # Query # # BEGIN +master-bin.000002 # Table_map # # table_id: # (test.t1) +master-bin.000002 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000002 # Xid # # COMMIT /* XID */ +DROP TABLE t1; +DROP TABLE blackhole; +SET @@global.rpl_semi_sync_master_timeout = @save.rpl_semi_sync_master_timeout; +SET @@global.rpl_semi_sync_master_enabled = @save.rpl_semi_sync_master_enabled; diff --git a/mysql-test/suite/binlog/r/binlog_rotate_block_on_trxs_sbr.result b/mysql-test/suite/binlog/r/binlog_rotate_block_on_trxs_sbr.result new file mode 100644 index 000000000000..eb861568ac25 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_rotate_block_on_trxs_sbr.result @@ -0,0 +1,45 @@ +CALL mtr.add_suppression("Timeout waiting for reply of binlog *"); +SET @save.rpl_semi_sync_master_timeout = @@global.rpl_semi_sync_master_timeout; +SET @save.rpl_semi_sync_master_enabled = @@global.rpl_semi_sync_master_enabled; +[connection default] +CREATE TABLE t1(c1 INT PRIMARY KEY); +CREATE TABLE blackhole(c1 INT PRIMARY KEY) ENGINE=blackhole; +INSERT INTO t1 VALUES(1); +COMMIT; +# Create a 20 sec semisync timeout +SET global rpl_semi_sync_master_timeout=20000; +SET global rpl_semi_sync_master_enabled=1; +[connection conn1] +INSERT INTO blackhole VALUES (1);; +[connection conn2] +FLUSH LOGS; +[connection conn2] +[connection conn1] +[connection default] +INSERT INTO t1 VALUES (2); +# The first binlog file should contain the trx on blackhole table since +# the flush was blocked by the trx. The insert (of value 2) into t1 +# should be in the rotated file (second binlog file) +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Query # # use `mtr`; INSERT INTO test_suppressions (pattern) VALUES ( NAME_CONST('pattern',_utf8mb4'Timeout waiting for reply of binlog *' COLLATE 'utf8mb4_0900_ai_ci')) +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # use `test`; CREATE TABLE t1(c1 INT PRIMARY KEY) +master-bin.000001 # Query # # use `test`; CREATE TABLE blackhole(c1 INT PRIMARY KEY) ENGINE=blackhole +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Query # # use `test`; INSERT INTO t1 VALUES(1) +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Query # # use `test`; INSERT INTO blackhole VALUES (1) +master-bin.000001 # Query # # COMMIT +master-bin.000001 # Rotate # # master-bin.000002;pos=POS +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000002 # Query # # BEGIN +master-bin.000002 # Query # # use `test`; INSERT INTO t1 VALUES (2) +master-bin.000002 # Xid # # COMMIT /* XID */ +DROP TABLE t1; +DROP TABLE blackhole; +SET @@global.rpl_semi_sync_master_timeout = @save.rpl_semi_sync_master_timeout; +SET @@global.rpl_semi_sync_master_enabled = @save.rpl_semi_sync_master_enabled; diff --git a/mysql-test/suite/binlog/r/binlog_trim_option_only.result b/mysql-test/suite/binlog/r/binlog_trim_option_only.result new file mode 100644 index 000000000000..21d3b29d7ea2 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_trim_option_only.result @@ -0,0 +1,14 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +CREATE TABLE t1 (pk int primary key); +INSERT INTO t1 VALUES (1); +INSERT INTO t1 VALUES (2); +INSERT INTO t1 VALUES (3); +INSERT INTO t1 VALUES (4); +INSERT INTO t1 VALUES (5); +DROP TABLE t1; +include/sync_slave_sql_with_master.inc +include/rpl_end.inc diff --git a/mysql-test/suite/binlog/r/binlog_truncate_across_flush_rbr.result b/mysql-test/suite/binlog/r/binlog_truncate_across_flush_rbr.result new file mode 100644 index 000000000000..8f8e30b380d2 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_across_flush_rbr.result @@ -0,0 +1,91 @@ +CALL mtr.add_suppression("Taking backup from .*"); +CREATE TABLE t1(c1 INT); +CREATE TABLE blackhole (c1 INT PRIMARY KEY) ENGINE=BLACKHOLE; +INSERT INTO t1 VALUES(1); +INSERT INTO blackhole VALUES(1); +COMMIT; +INSERT INTO t1 VALUES(2); +COMMIT; +FLUSH LOGS; +# Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +BEGIN; +INSERT INTO t1 VALUES(3); +COMMIT; +ERROR HY000: Lost connection to MySQL server during query +# Restart the master server +# +# Verify that a transaction cannot be recovered during server +# recovery from a crash, which happened after flushing it +# to binary log. This is because the transaction is still marked +# as prepared in engine and will be rollbacked when +# trim-binlog-to-recover is set +# +include/assert.inc [There should be 2 rows in table t1] +INSERT INTO t1 VALUES(4); +COMMIT; +FLUSH ENGINE LOGS; +# +# verify that the latest binlog file is trimmed to the starting position +# of the first gtid event +# +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (mtr.test_suppressions) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # use `test`; CREATE TABLE t1(c1 INT) +master-bin.000001 # Query # # use `test`; CREATE TABLE blackhole (c1 INT PRIMARY KEY) ENGINE=BLACKHOLE +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.blackhole) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Query # # COMMIT +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Rotate # # master-bin.000002;pos=POS +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000003 # Query # # BEGIN +master-bin.000003 # Table_map # # table_id: # (test.t1) +master-bin.000003 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000003 # Xid # # COMMIT /* XID */ +master-bin.000003 # Query # # use `test`; FLUSH ENGINE LOGS +# Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +BEGIN; +INSERT INTO blackhole VALUES(2); +COMMIT; +ERROR HY000: Lost connection to MySQL server during query +# Restart the master server +INSERT INTO t1 VALUES(5); +COMMIT; +FLUSH ENGINE LOGS; +include/assert.inc [There should be 4 rows in table t1] +# +# verify that the latest binlog file is trimmed to the starting position +# of the first gtid event +# +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000003 # Query # # BEGIN +master-bin.000003 # Table_map # # table_id: # (test.t1) +master-bin.000003 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000003 # Xid # # COMMIT /* XID */ +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000004 # Query # # BEGIN +master-bin.000004 # Table_map # # table_id: # (test.t1) +master-bin.000004 # Write_rows # # table_id: # flags: STMT_END_F +master-bin.000004 # Xid # # COMMIT /* XID */ +master-bin.000004 # Query # # use `test`; FLUSH ENGINE LOGS +DROP TABLE t1; +DROP TABLE blackhole; diff --git a/mysql-test/suite/binlog/r/binlog_truncate_across_flush_sbr.result b/mysql-test/suite/binlog/r/binlog_truncate_across_flush_sbr.result new file mode 100644 index 000000000000..1410832aff4e --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_across_flush_sbr.result @@ -0,0 +1,84 @@ +CALL mtr.add_suppression("Taking backup from .*"); +CREATE TABLE t1(c1 INT); +CREATE TABLE blackhole (c1 INT PRIMARY KEY) ENGINE=BLACKHOLE; +INSERT INTO t1 VALUES(1); +INSERT INTO blackhole VALUES(1); +COMMIT; +INSERT INTO t1 VALUES(2); +COMMIT; +FLUSH LOGS; +# Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +BEGIN; +INSERT INTO t1 VALUES(3); +COMMIT; +ERROR HY000: Lost connection to MySQL server during query +# Restart the master server +# +# Verify that a transaction cannot be recovered during server +# recovery from a crash, which happened after flushing it +# to binary log. This is because the transaction is still marked +# as prepared in engine and will be rollbacked when +# trim-binlog-to-recover is set +# +include/assert.inc [There should be 2 rows in table t1] +INSERT INTO t1 VALUES(4); +COMMIT; +FLUSH ENGINE LOGS; +# +# verify that the latest binlog file is trimmed to the starting position +# of the first gtid event +# +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Query # # use `mtr`; INSERT INTO test_suppressions (pattern) VALUES ( NAME_CONST('pattern',_utf8mb4'Taking backup from .*' COLLATE 'utf8mb4_0900_ai_ci')) +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # use `test`; CREATE TABLE t1(c1 INT) +master-bin.000001 # Query # # use `test`; CREATE TABLE blackhole (c1 INT PRIMARY KEY) ENGINE=BLACKHOLE +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Query # # use `test`; INSERT INTO t1 VALUES(1) +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Query # # use `test`; INSERT INTO blackhole VALUES(1) +master-bin.000001 # Query # # COMMIT +master-bin.000001 # Query # # BEGIN +master-bin.000001 # Query # # use `test`; INSERT INTO t1 VALUES(2) +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Rotate # # master-bin.000002;pos=POS +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000003 # Query # # BEGIN +master-bin.000003 # Query # # use `test`; INSERT INTO t1 VALUES(4) +master-bin.000003 # Xid # # COMMIT /* XID */ +master-bin.000003 # Query # # use `test`; FLUSH ENGINE LOGS +# Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +BEGIN; +INSERT INTO blackhole VALUES(2); +COMMIT; +ERROR HY000: Lost connection to MySQL server during query +# Restart the master server +INSERT INTO t1 VALUES(5); +COMMIT; +FLUSH ENGINE LOGS; +include/assert.inc [There should be 4 rows in table t1] +# +# verify that the latest binlog file is trimmed to the starting position +# of the first gtid event +# +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000003 # Query # # BEGIN +master-bin.000003 # Query # # use `test`; INSERT INTO t1 VALUES(4) +master-bin.000003 # Xid # # COMMIT /* XID */ +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000004 # Query # # BEGIN +master-bin.000004 # Query # # use `test`; INSERT INTO t1 VALUES(5) +master-bin.000004 # Xid # # COMMIT /* XID */ +master-bin.000004 # Query # # use `test`; FLUSH ENGINE LOGS +DROP TABLE t1; +DROP TABLE blackhole; diff --git a/mysql-test/suite/binlog/r/binlog_truncate_crash_recovery.result b/mysql-test/suite/binlog/r/binlog_truncate_crash_recovery.result new file mode 100644 index 000000000000..a4e964cd479c --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_crash_recovery.result @@ -0,0 +1,38 @@ +CALL mtr.add_suppression("Taking backup from .*"); +CREATE TABLE t1(c1 INT); +INSERT INTO t1 VALUES(100); +COMMIT; +FLUSH LOGS; +# Crash right after flushing engine log +SET SESSION DEBUG="+d,crash_after_flush_engine_log"; +BEGIN; +INSERT INTO t1 VALUES(1); +COMMIT; +ERROR HY000: Lost connection to MySQL server during query +# Restart the master server +# +# Verify that a transaction can not be recovered during server +# recovery from a crash, which happened after flushing it to +# engine log and before flushing it to binary log. +# +SELECT count(*) FROM t1 WHERE c1=1; +count(*) +0 +# Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +BEGIN; +INSERT INTO t1 VALUES(2); +COMMIT; +ERROR HY000: Lost connection to MySQL server during query +# Restart the master server +# +# Verify that a transaction cannot be recovered during server +# recovery from a crash, which happened after flushing it +# to binary log. This is because the transaction is still marked +# as prepared in engine and will be rollbacked when +# trim-binlog-to-recover is set +# +SELECT count(*) FROM t1 WHERE c1=2; +count(*) +0 +DROP TABLE t1; diff --git a/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs.inc b/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs.inc new file mode 100644 index 000000000000..cf92bf19942b --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs.inc @@ -0,0 +1,71 @@ +--source include/count_sessions.inc + +CALL mtr.add_suppression("Timeout waiting for reply of binlog *"); + +SET @save.rpl_semi_sync_master_timeout = @@global.rpl_semi_sync_master_timeout; +SET @save.rpl_semi_sync_master_enabled = @@global.rpl_semi_sync_master_enabled; + +--connect(conn1,localhost,root,,test) +--connect(conn2,localhost,root,,test) + +--let $rpl_connection_name= default +--source include/rpl_connection.inc +CREATE TABLE t1(c1 INT PRIMARY KEY); +CREATE TABLE blackhole(c1 INT PRIMARY KEY) ENGINE=blackhole; + +INSERT INTO t1 VALUES(1); +COMMIT; + + +--let $first_binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) + +-- echo # Create a 20 sec semisync timeout +SET global rpl_semi_sync_master_timeout=20000; +SET global rpl_semi_sync_master_enabled=1; + +--let $rpl_connection_name= conn1 +--source include/rpl_connection.inc +--send INSERT INTO blackhole VALUES (1); + +--let $rpl_connection_name= conn2 +--source include/rpl_connection.inc + +# Rotate the binary log +--send FLUSH LOGS + +# Wait until the server reaches the debug sync point while rotating the +# binary log +--let $rpl_connection_name= conn2 +--source include/rpl_connection.inc +--reap + +--let $rpl_connection_name= conn1 +--source include/rpl_connection.inc +--reap + +--let $rpl_connection_name= default +--source include/rpl_connection.inc +INSERT INTO t1 VALUES (2); + +--let $second_binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) + +--echo # The first binlog file should contain the trx on blackhole table since +--echo # the flush was blocked by the trx. The insert (of value 2) into t1 +--echo # should be in the rotated file (second binlog file) +--let $binlog_file= $first_binlog_file +--source include/show_binlog_events.inc + +--let $binlog_file= $second_binlog_file +--source include/show_binlog_events.inc + +# Cleanup +DROP TABLE t1; +DROP TABLE blackhole; + +SET @@global.rpl_semi_sync_master_timeout = @save.rpl_semi_sync_master_timeout; +SET @@global.rpl_semi_sync_master_enabled = @save.rpl_semi_sync_master_enabled; + +# Disconnect the additional connections +--disconnect conn1 +--disconnect conn2 +--source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_rbr-master.opt b/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_rbr-master.opt new file mode 100644 index 000000000000..04022f1bbab1 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_rbr-master.opt @@ -0,0 +1,3 @@ +$SEMISYNC_MASTER_PLUGIN_OPT $SEMISYNC_MASTER_PLUGIN_LOAD +--trim-binlog-to-recover --gtid_mode=ON --enforce_gtid_consistency --log_bin=master-bin +--log_slave_updates diff --git a/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_rbr.test b/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_rbr.test new file mode 100644 index 000000000000..5bf47c7cfdfe --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_rbr.test @@ -0,0 +1,9 @@ +# Verifies that trxs that do not generate xid (such ad trxs on blackhole +# engines) block rotation of binlog files until the trx finished commiting (when +# node fenbce is enabled through rpl_semisync_timeout) +--source include/not_valgrind.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_binlog_format_row.inc + +--source binlog_rotate_block_on_trxs.inc diff --git a/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_sbr-master.opt b/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_sbr-master.opt new file mode 100644 index 000000000000..04022f1bbab1 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_sbr-master.opt @@ -0,0 +1,3 @@ +$SEMISYNC_MASTER_PLUGIN_OPT $SEMISYNC_MASTER_PLUGIN_LOAD +--trim-binlog-to-recover --gtid_mode=ON --enforce_gtid_consistency --log_bin=master-bin +--log_slave_updates diff --git a/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_sbr.test b/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_sbr.test new file mode 100644 index 000000000000..eb363d1c51cf --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_rotate_block_on_trxs_sbr.test @@ -0,0 +1,9 @@ +# Verifies that trxs that do not generate xid (such ad trxs on blackhole +# engines) block rotation of binlog files until the trx finished commiting (when +# node fenbce is enabled through rpl_semisync_timeout) +--source include/not_valgrind.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_binlog_format_mixed_or_statement.inc + +--source binlog_rotate_block_on_trxs.inc diff --git a/mysql-test/suite/binlog/t/binlog_trim_option_only-master.opt b/mysql-test/suite/binlog/t/binlog_trim_option_only-master.opt new file mode 100644 index 000000000000..cac45b616ac1 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_trim_option_only-master.opt @@ -0,0 +1,2 @@ +--trim-binlog-to-recover --gtid_mode=ON --enforce_gtid_consistency --log_bin +--log_slave_updates --binlog_checksum=CRC32 diff --git a/mysql-test/suite/binlog/t/binlog_trim_option_only-slave.opt b/mysql-test/suite/binlog/t/binlog_trim_option_only-slave.opt new file mode 100644 index 000000000000..cac45b616ac1 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_trim_option_only-slave.opt @@ -0,0 +1,2 @@ +--trim-binlog-to-recover --gtid_mode=ON --enforce_gtid_consistency --log_bin +--log_slave_updates --binlog_checksum=CRC32 diff --git a/mysql-test/suite/binlog/t/binlog_trim_option_only.test b/mysql-test/suite/binlog/t/binlog_trim_option_only.test new file mode 100644 index 000000000000..c7686ad52e9a --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_trim_option_only.test @@ -0,0 +1,23 @@ +# This test verifies that in relay log code path we have initialized all +# member variables. We've seen an issue with non_xid_trxs not being initialized +# and causing relay log rotation getting stuck. This test doesn't do anything +# fancy (such as restart) and can be run in valgrind. +# Note that mysql_bin_log doesn't have uninitialized variable problem as +# all members are initialized to 0 so it's determinstic. +--source include/have_log_bin.inc +--source include/master-slave.inc + +# Exercise the inc/dec non_xid_trxs path in group commit +CREATE TABLE t1 (pk int primary key); + +INSERT INTO t1 VALUES (1); +INSERT INTO t1 VALUES (2); +INSERT INTO t1 VALUES (3); +INSERT INTO t1 VALUES (4); +INSERT INTO t1 VALUES (5); + +DROP TABLE t1; + +--source include/sync_slave_sql_with_master.inc +--source include/rpl_end.inc + diff --git a/mysql-test/suite/binlog/t/binlog_truncate_across_flush.inc b/mysql-test/suite/binlog/t/binlog_truncate_across_flush.inc new file mode 100644 index 000000000000..40d73ae082aa --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_across_flush.inc @@ -0,0 +1,123 @@ +# +# verify that binlog truncation to match engine position works correctly across +# binlog file rotation. On binlog file rotation which is followed by a crash +# before committing new trx to engine, we should trim everything from the latest +# binlog file. Without this, gtid_executed could show additional trxs that were +# not committed to engine +# +# For example: +# 1. To start with master-bin.000001 is the binlog file which gets rotated +# 2. New trxs are started andmysqld crashes after flushing the trxs to binlog, +# but before committing to engine +# 3. On recovery wngine will say that the binlog file it last saw was +# master-bin.000001. The current binlog file being recovered is +# master-bin.000002 which should be truncated to the beginin of the first gtid +# event in this file +# +--source include/not_valgrind.inc +--source include/have_log_bin.inc +--source include/have_debug.inc + +CALL mtr.add_suppression("Taking backup from .*"); + +CREATE TABLE t1(c1 INT); +CREATE TABLE blackhole (c1 INT PRIMARY KEY) ENGINE=BLACKHOLE; + +INSERT INTO t1 VALUES(1); +INSERT INTO blackhole VALUES(1); +COMMIT; + +INSERT INTO t1 VALUES(2); +COMMIT; + +--let $first_binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) + +# Flush logs to rotate binlog and make trx durable in engine +FLUSH LOGS; + +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--echo # Crash right after flushing binary log + +--let $second_binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +BEGIN; +INSERT INTO t1 VALUES(3); +--error CR_SERVER_LOST +COMMIT; +--source include/wait_until_disconnected.inc + +--enable_reconnect +--echo # Restart the master server +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect + +--echo # +--echo # Verify that a transaction cannot be recovered during server +--echo # recovery from a crash, which happened after flushing it +--echo # to binary log. This is because the transaction is still marked +--echo # as prepared in engine and will be rollbacked when +--echo # trim-binlog-to-recover is set +--echo # +--let $assert_text= There should be 2 rows in table t1 +--let $assert_cond= [SELECT COUNT(*) FROM t1] = 2 +--source include/assert.inc + +--let $third_binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) + +INSERT INTO t1 VALUES(4); +COMMIT; +FLUSH ENGINE LOGS; + +--echo # +--echo # verify that the latest binlog file is trimmed to the starting position +--echo # of the first gtid event +--echo # +--let $binlog_file= $first_binlog_file +--source include/show_binlog_events.inc + +--let $binlog_file= $second_binlog_file +--source include/show_binlog_events.inc + +--let $binlog_file= $third_binlog_file +--source include/show_binlog_events.inc + +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--echo # Crash right after flushing binary log + +# Now write to blackhole table and simulate a crash +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +BEGIN; +INSERT INTO blackhole VALUES(2); +--error CR_SERVER_LOST +COMMIT; +--source include/wait_until_disconnected.inc + +--enable_reconnect +--echo # Restart the master server +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect + +INSERT INTO t1 VALUES(5); +COMMIT; +FLUSH ENGINE LOGS; + +--let $assert_text= There should be 4 rows in table t1 +--let $assert_cond= [SELECT COUNT(*) FROM t1] = 4 +--source include/assert.inc + +--echo # +--echo # verify that the latest binlog file is trimmed to the starting position +--echo # of the first gtid event +--echo # +--let $binlog_file= $third_binlog_file +--source include/show_binlog_events.inc + +--let $fourth_binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) +--let $binlog_file= $fourth_binlog_file +--source include/show_binlog_events.inc + +# Cleanup +DROP TABLE t1; +DROP TABLE blackhole; diff --git a/mysql-test/suite/binlog/t/binlog_truncate_across_flush_rbr-master.opt b/mysql-test/suite/binlog/t/binlog_truncate_across_flush_rbr-master.opt new file mode 100644 index 000000000000..2f2f85a367ab --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_across_flush_rbr-master.opt @@ -0,0 +1,2 @@ +--trim-binlog-to-recover --gtid_mode=ON --enforce_gtid_consistency --log_bin=master-bin +--log_slave_updates --binlog_checksum=CRC32 diff --git a/mysql-test/suite/binlog/t/binlog_truncate_across_flush_rbr.test b/mysql-test/suite/binlog/t/binlog_truncate_across_flush_rbr.test new file mode 100644 index 000000000000..e9bd8ab20988 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_across_flush_rbr.test @@ -0,0 +1,22 @@ +# +# verify that binlog truncation to match engine position works correctly across +# binlog file rotation. On binlog file rotation which is followed by a crash +# before commiting new trx to engine, we should trim everything from the latest +# binlog file. Without this, gtid_executed could show additional trxs that were +# not committed to engine +# +# For example: +# 1. To start with master-bin.000001 is the binlog file which gets rotated +# 2. New trxs are started andmysqld crashes after flushing the trxs to binlog, +# but before committing to engine +# 3. On recovery wngine will say that the binlog file it last saw was +# master-bin.000001. The current binlog file being recovered is +# master-bin.000002 which should be truncated to the beginin of the first gtid +# event in this file +# +--source include/not_valgrind.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_binlog_format_row.inc + +--source binlog_truncate_across_flush.inc diff --git a/mysql-test/suite/binlog/t/binlog_truncate_across_flush_sbr-master.opt b/mysql-test/suite/binlog/t/binlog_truncate_across_flush_sbr-master.opt new file mode 100644 index 000000000000..80cfa5de0c45 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_across_flush_sbr-master.opt @@ -0,0 +1,2 @@ +--trim-binlog-to-recover --gtid_mode=ON --enforce_gtid_consistency +--log_bin=master-bin --log_slave_updates --binlog_checksum=CRC32 diff --git a/mysql-test/suite/binlog/t/binlog_truncate_across_flush_sbr.test b/mysql-test/suite/binlog/t/binlog_truncate_across_flush_sbr.test new file mode 100644 index 000000000000..4c8a354abb88 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_across_flush_sbr.test @@ -0,0 +1,22 @@ +# +# verify that binlog truncation to match engine position works correctly across +# binlog file rotation. On binlog file rotation which is followed by a crash +# before commiting new trx to engine, we should trim everything from the latest +# binlog file. Without this, gtid_executed could show additional trxs that were +# not committed to engine +# +# For example: +# 1. To start with master-bin.000001 is the binlog file which gets rotated +# 2. New trxs are started andmysqld crashes after flushing the trxs to binlog, +# but before committing to engine +# 3. On recovery wngine will say that the binlog file it last saw was +# master-bin.000001. The current binlog file being recovered is +# master-bin.000002 which should be truncated to the beginin of the first gtid +# event in this file +# +--source include/not_valgrind.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_binlog_format_mixed_or_statement.inc + +--source binlog_truncate_across_flush.inc diff --git a/mysql-test/suite/binlog/t/binlog_truncate_crash_recovery-master.opt b/mysql-test/suite/binlog/t/binlog_truncate_crash_recovery-master.opt new file mode 100644 index 000000000000..2cf4ce6d1ead --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_crash_recovery-master.opt @@ -0,0 +1 @@ +--sync-binlog=1 --innodb-flush-log-at-trx-commit=1 --trim-binlog-to-recover diff --git a/mysql-test/suite/binlog/t/binlog_truncate_crash_recovery.test b/mysql-test/suite/binlog/t/binlog_truncate_crash_recovery.test new file mode 100644 index 000000000000..42e32e757da5 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_crash_recovery.test @@ -0,0 +1,72 @@ +# +# Verify that a transaction can not be recovered during server +# recovery from a crash, which happened after flushing it to +# engine log and before flushing it to binary log. +# +# +# Verify that transaction cannot be recovered during server recovery from +# a crash, which happened after flushing it to binary log. This is because +# we set trim-binlog-to-recover startup option which rollsback all prepared +# transactions in the engine (even if it is marked commit in the binlog) +# and truncates the binlog during recovery +# + +--source include/not_valgrind.inc +--source include/have_log_bin.inc +--source include/have_debug.inc + +CALL mtr.add_suppression("Taking backup from .*"); + +CREATE TABLE t1(c1 INT); +INSERT INTO t1 VALUES(100); +COMMIT; +FLUSH LOGS; + +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--echo # Crash right after flushing engine log +SET SESSION DEBUG="+d,crash_after_flush_engine_log"; +BEGIN; +INSERT INTO t1 VALUES(1); +--error CR_SERVER_LOST +COMMIT; +--source include/wait_until_disconnected.inc + +--enable_reconnect +--echo # Restart the master server +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect + +--echo # +--echo # Verify that a transaction can not be recovered during server +--echo # recovery from a crash, which happened after flushing it to +--echo # engine log and before flushing it to binary log. +--echo # +SELECT count(*) FROM t1 WHERE c1=1; + +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--echo # Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +BEGIN; +INSERT INTO t1 VALUES(2); +--error CR_SERVER_LOST +COMMIT; +--source include/wait_until_disconnected.inc + +--enable_reconnect +--echo # Restart the master server +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect + +--echo # +--echo # Verify that a transaction cannot be recovered during server +--echo # recovery from a crash, which happened after flushing it +--echo # to binary log. This is because the transaction is still marked +--echo # as prepared in engine and will be rollbacked when +--echo # trim-binlog-to-recover is set +--echo # +SELECT count(*) FROM t1 WHERE c1=2; + +# Cleanup +DROP TABLE t1; diff --git a/mysql-test/suite/innodb/r/mysqldump_max_recordsize.result b/mysql-test/suite/innodb/r/mysqldump_max_recordsize.result index a3156181bf95..017ed04b48f7 100644 --- a/mysql-test/suite/innodb/r/mysqldump_max_recordsize.result +++ b/mysql-test/suite/innodb/r/mysqldump_max_recordsize.result @@ -1,4 +1,5 @@ call mtr.add_suppression("ERROR 1118 \\(42000\\) at line 106: Row size too large \\(> 8126\\)"); +call mtr.add_suppression("Error reading GTIDs from binary log"); # Stop DB server which was created by MTR default # create bootstrap file # Run the bootstrap command with page size 32k diff --git a/mysql-test/suite/innodb/t/mysqldump_max_recordsize.test b/mysql-test/suite/innodb/t/mysqldump_max_recordsize.test index 352f42c101ed..facb9c5d0dc7 100644 --- a/mysql-test/suite/innodb/t/mysqldump_max_recordsize.test +++ b/mysql-test/suite/innodb/t/mysqldump_max_recordsize.test @@ -23,6 +23,7 @@ let $MYSQLD_BASEDIR= `select @@basedir`; let $MYSQLD_DATADIR1 = $MYSQL_TMP_DIR/datadir1/data; call mtr.add_suppression("ERROR 1118 \\(42000\\) at line 106: Row size too large \\(> 8126\\)"); +call mtr.add_suppression("Error reading GTIDs from binary log"); # Stop the MTR default datadir, as it was started with default 16k page size --echo # Stop DB server which was created by MTR default diff --git a/mysql-test/suite/perfschema/r/relaylog.result b/mysql-test/suite/perfschema/r/relaylog.result index e4f287880942..6eee1cd0a115 100644 --- a/mysql-test/suite/perfschema/r/relaylog.result +++ b/mysql-test/suite/perfschema/r/relaylog.result @@ -62,6 +62,7 @@ where event_name like "%MYSQL_BIN_LOG%" order by event_name; EVENT_NAME COUNT_STAR wait/synch/cond/sql/MYSQL_BIN_LOG::COND_done NONE +wait/synch/cond/sql/MYSQL_BIN_LOG::non_xid_trxs_cond NONE wait/synch/cond/sql/MYSQL_BIN_LOG::prep_xids_cond NONE wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_binlog_end_pos MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_commit MANY @@ -70,6 +71,7 @@ wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_done MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_flush_queue MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_index MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_log MANY +wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_non_xid_trxs MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_sync MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_sync_queue MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_xids NONE @@ -90,6 +92,7 @@ where event_name like "%MYSQL_RELAY_LOG%" order by event_name; EVENT_NAME COUNT_STAR SUM_TIMER_WAIT MIN_TIMER_WAIT AVG_TIMER_WAIT MAX_TIMER_WAIT wait/synch/cond/sql/MYSQL_RELAY_LOG::COND_done 0 0 0 0 0 +wait/synch/cond/sql/MYSQL_RELAY_LOG::non_xid_trxs_cond 0 0 0 0 0 wait/synch/cond/sql/MYSQL_RELAY_LOG::prep_xids_cond 0 0 0 0 0 wait/synch/mutex/sql/MYSQL_RELAY_LOG::LOCK_commit 0 0 0 0 0 wait/synch/mutex/sql/MYSQL_RELAY_LOG::LOCK_commit_queue 0 0 0 0 0 @@ -160,6 +163,7 @@ where event_name like "%MYSQL_BIN_LOG%" order by event_name; EVENT_NAME COUNT_STAR wait/synch/cond/sql/MYSQL_BIN_LOG::COND_done NONE +wait/synch/cond/sql/MYSQL_BIN_LOG::non_xid_trxs_cond NONE wait/synch/cond/sql/MYSQL_BIN_LOG::prep_xids_cond NONE wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_binlog_end_pos MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_commit MANY @@ -168,6 +172,7 @@ wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_done MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_flush_queue MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_index MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_log MANY +wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_non_xid_trxs MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_sync MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_sync_queue MANY wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_xids MANY @@ -210,6 +215,7 @@ where event_name like "%MYSQL_RELAY_LOG%" order by event_name; EVENT_NAME COUNT_STAR wait/synch/cond/sql/MYSQL_RELAY_LOG::COND_done NONE +wait/synch/cond/sql/MYSQL_RELAY_LOG::non_xid_trxs_cond NONE wait/synch/cond/sql/MYSQL_RELAY_LOG::prep_xids_cond NONE wait/synch/mutex/sql/MYSQL_RELAY_LOG::LOCK_commit NONE wait/synch/mutex/sql/MYSQL_RELAY_LOG::LOCK_commit_queue NONE diff --git a/mysql-test/suite/rocksdb/r/binlog_truncate_backup.result b/mysql-test/suite/rocksdb/r/binlog_truncate_backup.result new file mode 100644 index 000000000000..c2d0fd2b2ef4 --- /dev/null +++ b/mysql-test/suite/rocksdb/r/binlog_truncate_backup.result @@ -0,0 +1,28 @@ +CALL mtr.add_suppression("Taking backup from .*"); +CALL mtr.add_suppression("Error reading GTIDs from binary log"); +CREATE TABLE t1(c1 INT) ENGINE=rocksdb; +INSERT INTO t1 VALUES(1); +COMMIT; +# Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +INSERT INTO t1 VALUES(2); +ERROR HY000: Lost connection to MySQL server during query +# Restart the master server +# Verify that the trx was rolled back +include/assert.inc [t1 should have 1 row] + +# Verify that binlog backup was taken before truncating +1 +INSERT INTO t1 VALUES(10); +COMMIT; +# Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +INSERT INTO t1 VALUES(2); +ERROR HY000: Lost connection to MySQL server during query +# Restart the master server +# Verify that the trx was rolled back +include/assert.inc [t1 should have 2 rows] + +# Verify that binlog backup was taken before truncating +1 +DROP TABLE t1; diff --git a/mysql-test/suite/rocksdb/t/binlog_truncate_backup-master.opt b/mysql-test/suite/rocksdb/t/binlog_truncate_backup-master.opt new file mode 100644 index 000000000000..8a6aa50732f8 --- /dev/null +++ b/mysql-test/suite/rocksdb/t/binlog_truncate_backup-master.opt @@ -0,0 +1,2 @@ +--trim-binlog-to-recover +--binlog_format=row diff --git a/mysql-test/suite/rocksdb/t/binlog_truncate_backup.test b/mysql-test/suite/rocksdb/t/binlog_truncate_backup.test new file mode 100644 index 000000000000..4a086c176655 --- /dev/null +++ b/mysql-test/suite/rocksdb/t/binlog_truncate_backup.test @@ -0,0 +1,75 @@ +# +# verify that when a binlog gets truncated, a backup gets stored in the tmpdir +# +--source include/not_valgrind.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_rocksdb.inc + +CALL mtr.add_suppression("Taking backup from .*"); +CALL mtr.add_suppression("Error reading GTIDs from binary log"); + +CREATE TABLE t1(c1 INT) ENGINE=rocksdb; +INSERT INTO t1 VALUES(1); +COMMIT; + +# Simulate after writing to binlog, but before commiting to engine +# On restart the trx will be rolled back. binlog will be truncated and a backup +# will be taken before truncating the binlog +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--echo # Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +# 2013 - CR_SERVER_LOST +--error CR_SERVER_LOST +INSERT INTO t1 VALUES(2); +--source include/wait_until_disconnected.inc + +--enable_reconnect +--echo # Restart the master server +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect + +--echo # Verify that the trx was rolled back +--let $assert_text = t1 should have 1 row +--let $assert_cond = [SELECT COUNT(*) from t1] = 1 +--source include/assert.inc + +--echo +--echo # Verify that binlog backup was taken before truncating +--let $MY_TMPDIR= `select @@tmpdir` +--exec ls -l $MY_TMPDIR/binlog_backup.trunc | wc -l + +INSERT INTO t1 VALUES(10); +COMMIT; +# Simulate after writing to binlog, but before commiting to engine +# On restart the trx will be rolled back. binlog will be truncated and a backup +# will be taken before truncating the binlog +# Repeat the crash again to verify that the backup file is overwritten +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--echo # Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +# 2013 - CR_SERVER_LOST +--error CR_SERVER_LOST +INSERT INTO t1 VALUES(2); +--source include/wait_until_disconnected.inc + +--enable_reconnect +--echo # Restart the master server +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect + +--echo # Verify that the trx was rolled back +--let $assert_text = t1 should have 2 rows +--let $assert_cond = [SELECT COUNT(*) from t1] = 2 +--source include/assert.inc + +--echo +--echo # Verify that binlog backup was taken before truncating +--let $MY_TMPDIR= `select @@tmpdir` +--exec ls -l $MY_TMPDIR/binlog_backup.trunc | wc -l + + +# Cleanup +DROP TABLE t1; diff --git a/sql/binlog.cc b/sql/binlog.cc index 4dea85841e2b..656a49b55978 100644 --- a/sql/binlog.cc +++ b/sql/binlog.cc @@ -33,6 +33,7 @@ #include #include #include +#include #include "lex_string.h" #include "map_helpers.h" @@ -193,7 +194,8 @@ static void exec_binlog_error_action_abort(const char *err_string); static bool binlog_recover(Binlog_file_reader *binlog_file_reader, my_off_t *valid_pos, Gtid *binlog_max_gtid, char *engine_binlog_file, - my_off_t *engine_binlog_pos); + my_off_t *engine_binlog_pos, + const std::string &cur_binlog_file); static void binlog_prepare_row_images(const THD *thd, TABLE *table, bool is_update); static bool is_loggable_xa_prepare(THD *thd); @@ -3671,6 +3673,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period, bool relay_log) file_id(1), sync_period_ptr(sync_period), sync_counter(0), + non_xid_trxs(0), is_relay_log(relay_log), checksum_alg_reset(binary_log::BINLOG_CHECKSUM_ALG_UNDEF), relay_log_checksum_alg(binary_log::BINLOG_CHECKSUM_ALG_UNDEF), @@ -3704,8 +3707,10 @@ void MYSQL_BIN_LOG::cleanup() { mysql_mutex_destroy(&LOCK_sync); mysql_mutex_destroy(&LOCK_binlog_end_pos); mysql_mutex_destroy(&LOCK_xids); + mysql_mutex_destroy(&LOCK_non_xid_trxs); mysql_cond_destroy(&update_cond); mysql_cond_destroy(&m_prep_xids_cond); + mysql_cond_destroy(&non_xid_trxs_cond); if (!is_relay_log) { Commit_stage_manager::get_instance().deinit(); } @@ -3723,11 +3728,14 @@ void MYSQL_BIN_LOG::init_pthread_objects() { mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW); mysql_mutex_init(m_key_LOCK_commit, &LOCK_commit, MY_MUTEX_INIT_FAST); mysql_mutex_init(m_key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST); + mysql_mutex_init(m_key_LOCK_non_xid_trxs, &LOCK_non_xid_trxs, + MY_MUTEX_INIT_FAST); mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos, MY_MUTEX_INIT_FAST); mysql_mutex_init(m_key_LOCK_xids, &LOCK_xids, MY_MUTEX_INIT_FAST); mysql_cond_init(m_key_update_cond, &update_cond); mysql_cond_init(m_key_prep_xids_cond, &m_prep_xids_cond); + mysql_cond_init(m_key_non_xid_trxs_cond, &non_xid_trxs_cond); if (!is_relay_log) { Commit_stage_manager::get_instance().init( m_key_LOCK_flush_queue, m_key_LOCK_sync_queue, m_key_LOCK_commit_queue, @@ -6789,6 +6797,35 @@ void MYSQL_BIN_LOG::dec_prep_xids(THD *thd) { } } +void MYSQL_BIN_LOG::inc_non_xid_trxs(THD *thd) { + DBUG_ENTER("MYSQL_BIN_LOG::inc_non_xid_trxs"); + mysql_mutex_lock(&LOCK_non_xid_trxs); + ++non_xid_trxs; + thd->non_xid_trx = true; + mysql_mutex_unlock(&LOCK_non_xid_trxs); + DBUG_VOID_RETURN; +} + +void MYSQL_BIN_LOG::dec_non_xid_trxs(THD *thd) { + DBUG_ENTER("MYSQL_BIN_LOG::dec_non_xid_trxs"); + + mysql_mutex_lock(&LOCK_non_xid_trxs); + DBUG_ASSERT(non_xid_trxs > 0); + + if (non_xid_trxs > 0) --non_xid_trxs; + + thd->non_xid_trx = false; + + DBUG_PRINT("debug", ("non_xid_trxs: %d", non_xid_trxs)); + + /* Signal the threads that could be blocked in binlog rotation if the + * non_xid_trxs is zero*/ + if (non_xid_trxs == 0) mysql_cond_signal(&non_xid_trxs_cond); + + mysql_mutex_unlock(&LOCK_non_xid_trxs); + DBUG_VOID_RETURN; +} + /* Wrappers around new_file_impl to avoid using argument to control locking. The argument 1) less readable 2) breaks @@ -6865,6 +6902,14 @@ int MYSQL_BIN_LOG::new_file_impl( } mysql_mutex_unlock(&LOCK_xids); + if (opt_trim_binlog) { + /* Wait for all non-xid trxs to finish */ + mysql_mutex_lock(&LOCK_non_xid_trxs); + while (get_non_xid_trxs() > 0) + mysql_cond_wait(&non_xid_trxs_cond, &LOCK_non_xid_trxs); + mysql_mutex_unlock(&LOCK_non_xid_trxs); + } + mysql_mutex_lock(&LOCK_index); mysql_mutex_assert_owner(&LOCK_log); @@ -8256,9 +8301,11 @@ int MYSQL_BIN_LOG::open_binlog(const char *opt_name) { LogErr(INFORMATION_LEVEL, ER_BINLOG_RECOVERING_AFTER_CRASH_USING, opt_name); valid_pos = binlog_file_reader.position(); + // Get the raw filename without dirname + const std::string cur_log_file = log_name + dirname_length(log_name); error = binlog_recover(&binlog_file_reader, &valid_pos, &engine_binlog_max_gtid, engine_binlog_file, - &engine_binlog_pos); + &engine_binlog_pos, cur_log_file); binlog_size = binlog_file_reader.ifile()->length(); } else error = 0; @@ -8280,6 +8327,25 @@ int MYSQL_BIN_LOG::open_binlog(const char *opt_name) { /* Change binlog file size to valid_pos */ if (valid_pos < binlog_size) { + if (opt_trim_binlog) { + char backup_file[FN_REFLEN]; + myf opt = MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT; + fn_format(backup_file, "binlog_backup", opt_mysql_tmpdir, ".trunc", + opt); + + // NO_LINT_DEBUG + sql_print_error("Taking backup from %s to %s\n", log_name, + backup_file); + /* MY_HOLD_ORIGINAL_MODES prevents attempts to chown the file */ + if (my_copy(log_name, backup_file, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES))) { + // NO_LINT_DEBUG + sql_print_error( + "Could not take backup of the truncated binlog file %s", + log_name); + } + } + if (ofile->truncate(valid_pos)) { LogErr(ERROR_LEVEL, ER_BINLOG_CANT_TRIM_CRASHED_BINLOG); return -1; @@ -8697,7 +8763,10 @@ std::pair MYSQL_BIN_LOG::flush_thread_caches(THD *thd) { this function documentation for more info. */ thd->set_trans_pos(log_file_name, m_binlog_file->position()); - if (wrote_xid) inc_prep_xids(thd); + if (wrote_xid) + inc_prep_xids(thd); + else + inc_non_xid_trxs(thd); } DBUG_PRINT("debug", ("bytes: %llu", bytes)); return std::make_pair(error, bytes); @@ -8913,7 +8982,10 @@ void MYSQL_BIN_LOG::process_commit_stage_queue(THD *thd, THD *first) { flush error or session attach error for avoiding 3-way deadlock among user thread, rotate thread and dump thread. */ - if (head->get_transaction()->m_flags.xid_written) dec_prep_xids(head); + if (head->get_transaction()->m_flags.xid_written) + dec_prep_xids(head); + else if (head->non_xid_trx) + dec_non_xid_trxs(head); } } @@ -9078,7 +9150,10 @@ int MYSQL_BIN_LOG::finish_commit(THD *thd) { /* Decrement the prepared XID counter after storage engine commit */ - if (thd->get_transaction()->m_flags.xid_written) dec_prep_xids(thd); + if (thd->get_transaction()->m_flags.xid_written) + dec_prep_xids(thd); + else if (thd->non_xid_trx) + dec_non_xid_trxs(thd); /* If commit succeeded, we call the after_commit hook @@ -9093,6 +9168,8 @@ int MYSQL_BIN_LOG::finish_commit(THD *thd) { } } else if (thd->get_transaction()->m_flags.xid_written) dec_prep_xids(thd); + else if (thd->non_xid_trx) + dec_non_xid_trxs(thd); /* If the ordered commit didn't updated the GTIDs for this thd yet @@ -9501,6 +9578,89 @@ int MYSQL_BIN_LOG::ordered_commit(THD *thd, bool all, bool skip_commit) { return thd->commit_error == THD::CE_COMMIT_ERROR; } +// Given a file name of the form 'binlog-file-name.index', it extracts the +// and and returns it as a pair +// Example: +// master-bin-3306.0001 ==> Returns (master-bin-3306, 1) +// master-bin-3306.9999 ==> Returns (master-bin-3306, 9999) +static std::pair extract_file_index( + const std::string &file_name) { + char *end; + size_t pos = file_name.find_last_of('.'); + if (pos == string::npos) { + DBUG_ASSERT(0); // never should happened + return std::make_pair(file_name, 1); + } + std::string prefix = file_name.substr(0, pos); + uint index = std::strtoul(file_name.substr(pos + 1).c_str(), &end, 10); + + return std::make_pair(std::move(prefix), index); +} + +/* + * Sets the valid position in the binlog file based on engine position (i.e + * engine binlog filename and file position) + * + * @param - valid_pos[out] - Valid position to set + * @param - cur_binlog_file - The current binlog file that is being recovered + * @param - first_gtid_start - The starting position of the first gtid event in + * cur_binlog_file + * @param - engine_binlog_file - The engine binlog file name + * @param - engine_binlog_pos - The engine binlog file position + */ +static void set_valid_pos(my_off_t *valid_pos, + const std::string &cur_binlog_file, + my_off_t first_gtid_start, char *engine_binlog_file, + my_off_t engine_binlog_pos) { + std::string position = "Engine pos: " + std::to_string(engine_binlog_pos) + + ", Current binlog pos: " + std::to_string(*valid_pos) + + ", Engine binlog file: " + engine_binlog_file + + ", Current binlog file: " + cur_binlog_file; + // NO_LINT_DEBUG + sql_print_information("%s", position.c_str()); + + if (cur_binlog_file.compare(engine_binlog_file) == 0) { + // Case 1: Engine binlog file and current binlog files are the same. + // Compare based only on file position + if (*valid_pos > engine_binlog_pos) { + // Binlog will be truncated to this position + *valid_pos = engine_binlog_pos; + } else if (*valid_pos < engine_binlog_pos) { + // Engine is found to be ahead of the current binlog + // NO_LINT_DEBUG + sql_print_information( + "Engine is ahead of binlog. " + "Binlog will not be truncated to match engine."); + } + } else { + // Case 2: Engine and binlog file names are different. Compare based on file + // indexes. + const auto engine_file_pair = extract_file_index(engine_binlog_file); + const auto cur_file_pair = extract_file_index(cur_binlog_file); + + if (engine_file_pair.first.compare(cur_file_pair.first) != 0) { + // The file prefix stored in engine is different than the current file + // prefix. We cannot trim. So give up. Note that server will fail to start + // in this case + // NO_LINT_DEBUG + sql_print_information( + "The file prefix in engine does not match " + "the file prefix of the recovering binlog. There " + "will be no special trimming of the file"); + } else if (engine_file_pair.second < cur_file_pair.second) { + // Engine file is lower than current binlog file. Truncate to the begin + // position of the first gtid in the current binlog file + *valid_pos = first_gtid_start; + } else { + // Engine is found to be ahead of the current binlog + // NO_LINT_DEBUG + sql_print_information( + "Engine is ahead of binlog. " + "Binlog will not be truncated to match engine."); + } + } +} + /** MYSQLD server recovers from last crashed binlog. @@ -9508,6 +9668,7 @@ int MYSQL_BIN_LOG::ordered_commit(THD *thd, bool all, bool skip_commit) { @param[out] valid_pos The position of the last valid transaction or event(non-transaction) of the crashed binlog. valid_pos must be non-NULL. + @param[in] cur_binlog_file The current binlog file that is being recovered After a crash, storage engines may contain transactions that are prepared but not committed (in theory any engine, in practice @@ -9537,7 +9698,8 @@ int MYSQL_BIN_LOG::ordered_commit(THD *thd, bool all, bool skip_commit) { static bool binlog_recover(Binlog_file_reader *binlog_file_reader, my_off_t *valid_pos, Gtid *binlog_max_gtid, char *engine_binlog_file, - my_off_t *engine_binlog_pos) { + my_off_t *engine_binlog_pos, + const std::string &cur_binlog_file) { bool res = false; binlog::tools::Iterator it(binlog_file_reader); it.set_copy_event_buffer(); @@ -9549,6 +9711,7 @@ static bool binlog_recover(Binlog_file_reader *binlog_file_reader, */ Gtid current_gtid; current_gtid.clear(); + my_off_t first_gtid_start = 0; /* The flag is used for handling the case that a transaction is partially written to the binlog. @@ -9610,6 +9773,9 @@ static bool binlog_recover(Binlog_file_reader *binlog_file_reader, current_gtid.set(gev->get_sidno(true), gev->get_gno()); else current_gtid.clear(); + if (first_gtid_start == 0) + first_gtid_start = + ev->common_header->log_pos - ev->common_header->data_written; } default: { break; @@ -9670,11 +9836,30 @@ static bool binlog_recover(Binlog_file_reader *binlog_file_reader, will result in an assert. (Production builds would be safe since ha_recover returns right away if total_ha_2pc <= opt_log_bin.) */ - res = res || (total_ha_2pc > 1 && - ha_recover(&xids, binlog_max_gtid, engine_binlog_file, - engine_binlog_pos)); + if (!res && total_ha_2pc > 1) { + res = ha_recover(&xids, binlog_max_gtid, engine_binlog_file, + engine_binlog_pos); + if (res) goto fin1; + /* + If trim binlog on recover option is set, then we essentially trim + binlog to the position that the engine thinks it has committed. Note + that if opt_trim_binlog option is set, then engine recovery (called + through ha_recover() above) ensures that all prepared txns are rolled + back. There are a few things which need to be kept in mind: + 1. txns never span across two binlogs, hence it is safe to recover only + the latest binlog file. + 2. A binlog rotation ensures that the previous binlogs and engine's + transaction logs are flushed and made durable. Hence all previous + transactions are made durable. + */ + if (opt_trim_binlog) { + set_valid_pos(valid_pos, cur_binlog_file, first_gtid_start, + engine_binlog_file, *engine_binlog_pos); + } + } } +fin1: if (res) LogErr(ERROR_LEVEL, ER_BINLOG_CRASH_RECOVERY_FAILED); return res; } diff --git a/sql/binlog.h b/sql/binlog.h index f933b5df0c2c..01f77c84583c 100644 --- a/sql/binlog.h +++ b/sql/binlog.h @@ -266,10 +266,13 @@ class MYSQL_BIN_LOG : public TC_LOG { PSI_mutex_key m_key_LOCK_sync; /** The instrumentation key to use for @ LOCK_xids. */ PSI_mutex_key m_key_LOCK_xids; + PSI_mutex_key m_key_LOCK_non_xid_trxs; /** The instrumentation key to use for @ update_cond. */ PSI_cond_key m_key_update_cond; /** The instrumentation key to use for @ prep_xids_cond. */ PSI_cond_key m_key_prep_xids_cond; + /** The instrumentation key to use for @ non_xid_trxs_cond. */ + PSI_cond_key m_key_non_xid_trxs_cond; /** The instrumentation key to use for opening the log file. */ PSI_file_key m_key_file_log; /** The instrumentation key to use for opening the log index file. */ @@ -285,6 +288,7 @@ class MYSQL_BIN_LOG : public TC_LOG { mysql_mutex_t LOCK_sync; mysql_mutex_t LOCK_binlog_end_pos; mysql_mutex_t LOCK_xids; + mysql_mutex_t LOCK_non_xid_trxs; mysql_cond_t update_cond; std::atomic atomic_binlog_end_pos; @@ -371,6 +375,17 @@ class MYSQL_BIN_LOG : public TC_LOG { int32 get_prep_xids() { return m_atomic_prep_xids; } + uint32_t non_xid_trxs; + mysql_cond_t non_xid_trxs_cond; + + void inc_non_xid_trxs(THD *thd); + void dec_non_xid_trxs(THD *thd); + + int32 get_non_xid_trxs() { + mysql_mutex_assert_owner(&LOCK_non_xid_trxs); + return non_xid_trxs; + } + inline uint get_sync_period() { return *sync_period_ptr; } public: @@ -453,8 +468,9 @@ class MYSQL_BIN_LOG : public TC_LOG { PSI_mutex_key key_LOCK_flush_queue, PSI_mutex_key key_LOCK_log, PSI_mutex_key key_LOCK_binlog_end_pos, PSI_mutex_key key_LOCK_sync, PSI_mutex_key key_LOCK_sync_queue, PSI_mutex_key key_LOCK_xids, - PSI_cond_key key_COND_done, PSI_cond_key key_update_cond, - PSI_cond_key key_prep_xids_cond, PSI_file_key key_file_log, + PSI_mutex_key key_LOCK_non_xid_trxs, PSI_cond_key key_COND_done, + PSI_cond_key key_update_cond, PSI_cond_key key_prep_xids_cond, + PSI_cond_key key_non_xid_trxs_cond, PSI_file_key key_file_log, PSI_file_key key_file_log_index, PSI_file_key key_file_log_cache, PSI_file_key key_file_log_index_cache) { m_key_COND_done = key_COND_done; @@ -470,8 +486,10 @@ class MYSQL_BIN_LOG : public TC_LOG { m_key_LOCK_commit = key_LOCK_commit; m_key_LOCK_sync = key_LOCK_sync; m_key_LOCK_xids = key_LOCK_xids; + m_key_LOCK_non_xid_trxs = key_LOCK_non_xid_trxs; m_key_update_cond = key_update_cond; m_key_prep_xids_cond = key_prep_xids_cond; + m_key_non_xid_trxs_cond = key_non_xid_trxs_cond; m_key_file_log = key_file_log; m_key_file_log_index = key_file_log_index; m_key_file_log_cache = key_file_log_cache; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 1441ed486ba5..f50728ba0a33 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -922,6 +922,7 @@ static PSI_mutex_key key_BINLOG_LOCK_binlog_end_pos; static PSI_mutex_key key_BINLOG_LOCK_sync; static PSI_mutex_key key_BINLOG_LOCK_sync_queue; static PSI_mutex_key key_BINLOG_LOCK_xids; +static PSI_mutex_key key_BINLOG_LOCK_non_xid_trxs; static PSI_rwlock_key key_rwlock_global_sid_lock; static PSI_rwlock_key key_rwlock_gtid_mode_lock; static PSI_rwlock_key key_rwlock_LOCK_system_variables_hash; @@ -1004,7 +1005,7 @@ LEX_STRING opt_init_connect, opt_init_slave; LEX_STRING opt_mandatory_roles; bool opt_mandatory_roles_cache = false; bool opt_always_activate_granted_roles = false; -bool opt_bin_log; +bool opt_bin_log, opt_trim_binlog; bool opt_general_log, opt_slow_log, opt_general_log_raw; ulonglong log_output_options; bool opt_log_queries_not_using_indexes = false; @@ -4499,10 +4500,11 @@ int init_common_variables() { key_BINLOG_LOCK_commit_queue, key_BINLOG_LOCK_done, key_BINLOG_LOCK_flush_queue, key_BINLOG_LOCK_log, key_BINLOG_LOCK_binlog_end_pos, key_BINLOG_LOCK_sync, - key_BINLOG_LOCK_sync_queue, key_BINLOG_LOCK_xids, key_BINLOG_COND_done, - key_BINLOG_update_cond, key_BINLOG_prep_xids_cond, key_file_binlog, - key_file_binlog_index, key_file_binlog_cache, - key_file_binlog_index_cache); + key_BINLOG_LOCK_sync_queue, key_BINLOG_LOCK_xids, + key_BINLOG_LOCK_non_xid_trxs, key_BINLOG_COND_done, + key_BINLOG_update_cond, key_BINLOG_prep_xids_cond, + key_BINLOG_non_xid_trxs_cond, key_file_binlog, key_file_binlog_index, + key_file_binlog_cache, key_file_binlog_index_cache); #endif /* @@ -8434,6 +8436,12 @@ struct my_option my_long_options[] = { &opt_upgrade_mode, &opt_upgrade_mode, &upgrade_mode_typelib, GET_ENUM, REQUIRED_ARG, UPGRADE_AUTO, 0, 0, nullptr, 0, nullptr}, + {"trim-binlog-to-recover", OPT_TRIM_BINLOG_TO_RECOVER, + "Trim the last binlog (if required) to the position until which the " + "engine has successfully committed all transactions.", + &opt_trim_binlog, &opt_trim_binlog, 0, GET_BOOL, NO_ARG, 0, 0, 0, nullptr, + 0, nullptr}, + {nullptr, 0, nullptr, nullptr, nullptr, nullptr, GET_NO_ARG, NO_ARG, 0, 0, 0, nullptr, 0, nullptr}}; @@ -9639,6 +9647,7 @@ static int mysql_init_variables() { pidfile_name[0] = 0; binlog_file_basedir[0] = binlog_index_basedir[0] = 0; myisam_test_invalid_symlink = test_if_data_home_dir; + opt_trim_binlog = false; opt_general_log = opt_slow_log = false; opt_disable_networking = opt_skip_show_db = false; opt_skip_name_resolve = false; @@ -10258,6 +10267,9 @@ bool mysqld_get_one_option(int optid, case OPT_SHOW_OLD_TEMPORALS: push_deprecated_warn_no_replacement(nullptr, "show_old_temporals"); break; + case OPT_TRIM_BINLOG_TO_RECOVER: + opt_trim_binlog = true; + break; case 'p': if (argument) { char *start = argument; @@ -11154,6 +11166,7 @@ PSI_mutex_key key_RELAYLOG_LOCK_log_end_pos; PSI_mutex_key key_RELAYLOG_LOCK_sync; PSI_mutex_key key_RELAYLOG_LOCK_sync_queue; PSI_mutex_key key_RELAYLOG_LOCK_xids; +PSI_mutex_key key_RELAYLOG_LOCK_non_xid_trxs; PSI_mutex_key key_gtid_ensure_index_mutex; PSI_mutex_key key_object_cache_mutex; // TODO need to initialize PSI_cond_key key_object_loading_cond; // TODO need to initialize @@ -11177,6 +11190,7 @@ static PSI_mutex_info all_server_mutexes[]= { &key_BINLOG_LOCK_sync, "MYSQL_BIN_LOG::LOCK_sync", 0, 0, PSI_DOCUMENT_ME}, { &key_BINLOG_LOCK_sync_queue, "MYSQL_BIN_LOG::LOCK_sync_queue", 0, 0, PSI_DOCUMENT_ME}, { &key_BINLOG_LOCK_xids, "MYSQL_BIN_LOG::LOCK_xids", 0, 0, PSI_DOCUMENT_ME}, + { &key_BINLOG_LOCK_non_xid_trxs, "MYSQL_BIN_LOG::LOCK_non_xid_trxs", 0, 0, PSI_DOCUMENT_ME}, { &key_RELAYLOG_LOCK_commit, "MYSQL_RELAY_LOG::LOCK_commit", 0, 0, PSI_DOCUMENT_ME}, { &key_RELAYLOG_LOCK_commit_queue, "MYSQL_RELAY_LOG::LOCK_commit_queue", 0, 0, PSI_DOCUMENT_ME}, { &key_RELAYLOG_LOCK_done, "MYSQL_RELAY_LOG::LOCK_done", 0, 0, PSI_DOCUMENT_ME}, @@ -11187,6 +11201,7 @@ static PSI_mutex_info all_server_mutexes[]= { &key_RELAYLOG_LOCK_sync, "MYSQL_RELAY_LOG::LOCK_sync", 0, 0, PSI_DOCUMENT_ME}, { &key_RELAYLOG_LOCK_sync_queue, "MYSQL_RELAY_LOG::LOCK_sync_queue", 0, 0, PSI_DOCUMENT_ME}, { &key_RELAYLOG_LOCK_xids, "MYSQL_RELAY_LOG::LOCK_xids", 0, 0, PSI_DOCUMENT_ME}, + { &key_RELAYLOG_LOCK_non_xid_trxs, "MYSQL_RELAY_LOG::LOCK_xids", 0, 0, PSI_DOCUMENT_ME}, { &key_hash_filo_lock, "hash_filo::lock", 0, 0, PSI_DOCUMENT_ME}, { &Gtid_set::key_gtid_executed_free_intervals_mutex, "Gtid_set::gtid_executed::free_intervals_mutex", 0, 0, PSI_DOCUMENT_ME}, { &key_LOCK_crypt, "LOCK_crypt", PSI_FLAG_SINGLETON, 0, PSI_DOCUMENT_ME}, @@ -11318,6 +11333,8 @@ PSI_cond_key key_cond_mts_gaq; PSI_cond_key key_RELAYLOG_update_cond; PSI_cond_key key_RELAYLOG_COND_done; PSI_cond_key key_RELAYLOG_prep_xids_cond; +PSI_cond_key key_BINLOG_non_xid_trxs_cond; +PSI_cond_key key_RELAYLOG_non_xid_trxs_cond; PSI_cond_key key_gtid_ensure_index_cond; PSI_cond_key key_COND_thr_lock; PSI_cond_key key_commit_order_manager_cond; @@ -11332,9 +11349,11 @@ static PSI_cond_info all_server_conds[]= { &key_BINLOG_COND_done, "MYSQL_BIN_LOG::COND_done", 0, 0, PSI_DOCUMENT_ME}, { &key_BINLOG_update_cond, "MYSQL_BIN_LOG::update_cond", 0, 0, PSI_DOCUMENT_ME}, { &key_BINLOG_prep_xids_cond, "MYSQL_BIN_LOG::prep_xids_cond", 0, 0, PSI_DOCUMENT_ME}, + { &key_BINLOG_non_xid_trxs_cond, "MYSQL_BIN_LOG::non_xid_trxs_cond", 0, 0, PSI_DOCUMENT_ME}, { &key_RELAYLOG_COND_done, "MYSQL_RELAY_LOG::COND_done", 0, 0, PSI_DOCUMENT_ME}, { &key_RELAYLOG_update_cond, "MYSQL_RELAY_LOG::update_cond", 0, 0, PSI_DOCUMENT_ME}, { &key_RELAYLOG_prep_xids_cond, "MYSQL_RELAY_LOG::prep_xids_cond", 0, 0, PSI_DOCUMENT_ME}, + { &key_RELAYLOG_non_xid_trxs_cond, "MYSQL_RELAY_LOG::non_xid_trxs_cond", 0, 0, PSI_DOCUMENT_ME}, #if defined(_WIN32) { &key_COND_handler_count, "COND_handler_count", PSI_FLAG_SINGLETON, 0, PSI_DOCUMENT_ME}, #endif diff --git a/sql/mysqld.h b/sql/mysqld.h index c8587ac0215d..97b0078fadb4 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -157,7 +157,7 @@ enum_server_operational_state get_server_state(); extern bool opt_improved_dup_key_error; extern bool opt_large_files, server_id_supplied; -extern bool opt_bin_log; +extern bool opt_bin_log, opt_trim_binlog; extern bool opt_binlog_trx_meta_data; extern bool opt_log_slave_updates; extern bool opt_log_unsafe_statements; @@ -557,6 +557,7 @@ extern PSI_mutex_key key_RELAYLOG_LOCK_log; extern PSI_mutex_key key_RELAYLOG_LOCK_log_end_pos; extern PSI_mutex_key key_RELAYLOG_LOCK_sync; extern PSI_mutex_key key_RELAYLOG_LOCK_sync_queue; +extern PSI_mutex_key key_RELAYLOG_LOCK_non_xid_trxs; extern PSI_mutex_key key_RELAYLOG_LOCK_xids; extern PSI_mutex_key key_gtid_ensure_index_mutex; extern PSI_mutex_key key_mts_temp_table_LOCK; @@ -595,6 +596,8 @@ extern PSI_cond_key key_cond_mts_gaq; extern PSI_cond_key key_RELAYLOG_COND_done; extern PSI_cond_key key_RELAYLOG_update_cond; extern PSI_cond_key key_RELAYLOG_prep_xids_cond; +extern PSI_cond_key key_BINLOG_non_xid_trxs_cond; +extern PSI_cond_key key_RELAYLOG_non_xid_trxs_cond; extern PSI_cond_key key_gtid_ensure_index_cond; extern PSI_cond_key key_COND_thr_lock; extern PSI_cond_key key_cond_slave_worker_hash; diff --git a/sql/options_mysqld.h b/sql/options_mysqld.h index 3552140e80d2..2642eab08b67 100644 --- a/sql/options_mysqld.h +++ b/sql/options_mysqld.h @@ -109,7 +109,8 @@ enum options_mysqld { OPT_RELAY_LOG_INFO_FILE, OPT_MASTER_INFO_FILE, OPT_LOG_BIN_USE_V1_ROW_EVENTS, - OPT_SLAVE_ROWS_SEARCH_ALGORITHMS + OPT_SLAVE_ROWS_SEARCH_ALGORITHMS, + OPT_TRIM_BINLOG_TO_RECOVER, }; #endif // OPTIONS_MYSQLD_INCLUDED diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 2e3960936605..2058194f9b2e 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -205,8 +205,9 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery, key_RELAYLOG_LOCK_flush_queue, key_RELAYLOG_LOCK_log, key_RELAYLOG_LOCK_log_end_pos, key_RELAYLOG_LOCK_sync, key_RELAYLOG_LOCK_sync_queue, key_RELAYLOG_LOCK_xids, - key_RELAYLOG_COND_done, key_RELAYLOG_update_cond, - key_RELAYLOG_prep_xids_cond, key_file_relaylog, + key_RELAYLOG_LOCK_non_xid_trxs, key_RELAYLOG_COND_done, + key_RELAYLOG_update_cond, key_RELAYLOG_prep_xids_cond, + key_RELAYLOG_non_xid_trxs_cond, key_file_relaylog, key_file_relaylog_index, key_file_relaylog_cache, key_file_relaylog_index_cache); #endif diff --git a/sql/sql_class.h b/sql/sql_class.h index af665e1c9f65..5b35954ca325 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -2533,6 +2533,7 @@ class THD : public MDL_context_owner, sp_cache *sp_func_cache; /** number of name_const() substitutions, see sp_head.cc:subst_spvars() */ + bool non_xid_trx = false; uint query_name_consts; /* diff --git a/sql/xa.cc b/sql/xa.cc index b3f541c1cad4..642ab09439a7 100644 --- a/sql/xa.cc +++ b/sql/xa.cc @@ -374,7 +374,12 @@ static bool xarecover_handlerton(THD *, plugin_ref plugin, void *arg) { tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT; } // recovery mode - if (recovery_mode_condition) { + // We roll-forward the txn only if the current prepared txn is present + // in the binlog's commit list and we have not been explicitly asked + // to trim binlog during recovery. If we are asked to trim binlogs + // during recovery (i.e if opt_trim_binlog is set), then we have to + // rollback all prepared txns in the engine. + if (recovery_mode_condition && !opt_trim_binlog) { // case: check if this prepared transaction's gtid is greater than // what we recovered before if (current_gtid != nullptr &&