From ca00a4ed886ff09042bbef18917a165dd60b29d1 Mon Sep 17 00:00:00 2001 From: Manuel Ung <mung@meta.com> Date: Thu, 1 Jun 2023 02:50:44 -0700 Subject: [PATCH] Fix handling of consecutive sentinel values Summary: For partial indexes, it is possible to have consecutive sentinel values, and the code to skip over sentinel values during iteration does not handle this correctly. eg. If we have have the following data: ``` a b 1 1 (sentinel) 1 2 (sentinel) 1 2 1 1 2 2 ``` `get_prefix_from_start` returns `(1, 2)` as the group we expect to see (since `(1, 1)` group is empty on the primary key). A seek to `a >=1` on the secondary key would land on `(1, 1)`, but we skip this key since we detect that it's a sentinel value. The cursor is then on `(1, 2)` which matches the expected prefix, and we start reading from the SK from that position. However, this is also a sentinel value, and will not decode correctly. This leads to these errors: ``` ERROR 1296 (HY000): Got error 505 'Found data corruption.' from ROCKSDB ``` The fix is to use a while loop to skip over sentinel values. There are two other places where we skip over sentinel values, but they are not problematic since they are used for iterating over a non-empty groups (so consecutive sentinel values are not possible). I decided not to add while loops defensively in the other places, so that we fail loudly if something unexpected happens. Ideally, we should just seek directly to the first group that we got from the PK (which guarantees a non-empty group, similar to other cases), but this is a more involved change for later. Differential Revision: D46352747 --- mysql-test/suite/rocksdb/r/partial_index.result | 1 + mysql-test/suite/rocksdb/t/partial_index.test | 9 +++++++++ mysql-test/suite/rocksdb/t/partial_index_link.inc | 4 ++++ storage/rocksdb/rdb_iterator.cc | 5 ++++- 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/mysql-test/suite/rocksdb/r/partial_index.result b/mysql-test/suite/rocksdb/r/partial_index.result index 62013c092717..56df856168c8 100644 --- a/mysql-test/suite/rocksdb/r/partial_index.result +++ b/mysql-test/suite/rocksdb/r/partial_index.result @@ -231,5 +231,6 @@ rocksdb_partial_index_rows_sorted 1 include/assert.inc [Check that materialized groups are zero.] include/assert.inc [Check that materialized rows are zero.] DROP TABLE t1, t2; +DELETE FROM sentinel where c2 <= 2; DROP TABLE sentinel; set optimizer_force_index_for_range = off; diff --git a/mysql-test/suite/rocksdb/t/partial_index.test b/mysql-test/suite/rocksdb/t/partial_index.test index e57a585cae09..aaeee2810f5e 100644 --- a/mysql-test/suite/rocksdb/t/partial_index.test +++ b/mysql-test/suite/rocksdb/t/partial_index.test @@ -77,6 +77,15 @@ SELECT variable_name, t2.variable_value - t1.variable_value AS diff FROM t1 JOIN DROP TABLE t1, t2; +# +# Test consecutive sentinel values +# +DELETE FROM sentinel where c2 <= 2; + +--let $query1= SELECT c1, c2 FROM sentinel FORCE INDEX(c1); +--let $query2= SELECT c1, c2 FROM sentinel FORCE INDEX(PRIMARY); +--source include/diff_queries.inc + DROP TABLE sentinel; set optimizer_force_index_for_range = off; diff --git a/mysql-test/suite/rocksdb/t/partial_index_link.inc b/mysql-test/suite/rocksdb/t/partial_index_link.inc index dd1399b780a3..1b2f121f7640 100644 --- a/mysql-test/suite/rocksdb/t/partial_index_link.inc +++ b/mysql-test/suite/rocksdb/t/partial_index_link.inc @@ -47,6 +47,10 @@ while ($i < 10) { inc $i; } +--let $query1= SELECT DISTINCT link_type FROM link_table FORCE INDEX (id1_type) +--let $query2= SELECT DISTINCT link_type FROM link_table FORCE INDEX (id1_type2) +--source include/diff_queries.inc + # Rebuild the table so that nothing is materialized anymore. ALTER TABLE link_table ENGINE=ROCKSDB; diff --git a/storage/rocksdb/rdb_iterator.cc b/storage/rocksdb/rdb_iterator.cc index 45befb2839ee..4336e812bb0a 100644 --- a/storage/rocksdb/rdb_iterator.cc +++ b/storage/rocksdb/rdb_iterator.cc @@ -854,7 +854,10 @@ int Rdb_iterator_partial::seek(enum ha_rkey_function find_flag, rc = Rdb_iterator_base::seek(find_flag, start_key, true, end_key, read_current); - if (rc == 0 && Rdb_iterator_base::key().size() == m_cur_prefix_key_len) { + while (rc == 0 && Rdb_iterator_base::key().size() == m_cur_prefix_key_len) { + if (thd_killed(m_thd)) { + return HA_ERR_QUERY_INTERRUPTED; + } rc = direction ? Rdb_iterator_base::next() : Rdb_iterator_base::prev(); }