forked from facebook/mysql-5.6
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[vector] Add optimizer support for COUNT(*) queries
Summary: When COUNT(*) style aggregation appears in queries, the optimizer realizes that a full scan needs to be done. The index doesn't matter - any index will do as it long it satisfies the other query predicates present. There are two heuristics at play. If there is a choice of multiple indexes, the optimizer will pick the index that has the shortest length since that means less disk scan time, therefore faster query (heuristic #1). In the case of vector indexes, the current design can have the vector field present in the clustered PK as well as the vector index (eg the flat index). Even though there's no clear advantage, the heuristic is based on INNODB style index layout, and assumes that a secondary index will always be a better choice as long as it does not cover all table columns (heuristic #2). In reality, in both INNODB and MyRocks, the secondary key does contain the PK. But this fact is ignored. What ends up happening is that by only considered the # of explicitly defined key parts, the vector index ends up getting picked up. In the case of the IVF index, the vector index may be shorter than the primary clustered index. Thus secondary index will have a shorter key length than the primary, and will end up getting picked up (i.e. if the key part heuristic doesn't already pick the vector index again). There can be many other index combinations. We want the optimizer to pick another available index. If no other index is available, then pick the primary clustered index. Differential Revision: D55003211 fbshipit-source-id: 5ce6b34
- Loading branch information
Showing
3 changed files
with
448 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,263 @@ | ||
CREATE TABLE t1 ( | ||
id BIGINT NOT NULL PRIMARY KEY, | ||
a int, | ||
b int, | ||
vector1 JSON, | ||
INDEX vector_key_1(vector1) FB_VECTOR_INDEX_TYPE 'flat' fb_vector_dimension 3 | ||
); | ||
insert into t1 values (1, 2, 2, '[1,2,3]'), (2, 1, 2, '[2,22,31]'); | ||
insert into t1 values (3, 1, 1, '[1,2,4]'), (4, 2, 1, '[2,22,33]'); | ||
insert into t1 values (5, 3, 1, '[11,22,4]'), (6, 2, 3, '[7,22,33]'); | ||
insert into t1 values (7, 3, 2, '[10,20,40]'), (8, 2, 1, '[20,22,41]'); | ||
insert into t1 values (9, 1, 1, '[20,10,30]'), (10, 1, 2, '[25,25,41]'); | ||
|
||
1. Verify basic COUNT(*) on the table | ||
|
||
explain select COUNT(*) from t1; | ||
id select_type table partitions type possible_keys key key_len ref rows filtered Extra | ||
1 SIMPLE t1 NULL index NULL PRIMARY 8 NULL ROWS FILTERED Using index | ||
|
||
SELECT COUNT(*) from t1; | ||
COUNT(*) | ||
10 | ||
|
||
2. Verify COUNT(*) with WHERE clause with REF | ||
|
||
explain select COUNT(*) from t1 WHERE id = 2; | ||
id select_type table partitions type possible_keys key key_len ref rows filtered Extra | ||
1 SIMPLE t1 NULL const PRIMARY PRIMARY 8 const ROWS FILTERED Using index | ||
|
||
SELECT COUNT(*) from t1 WHERE id = 2; | ||
COUNT(*) | ||
1 | ||
|
||
3. Verify COUNT(*) with WHERE clause with RANGE | ||
|
||
explain select COUNT(*) from t1 WHERE id > 2 and id < 10; | ||
id select_type table partitions type possible_keys key key_len ref rows filtered Extra | ||
1 SIMPLE t1 NULL index PRIMARY PRIMARY 8 NULL ROWS FILTERED Using where; Using index | ||
|
||
SELECT COUNT(*) from t1 WHERE id = 2 > 2 and id < 10; | ||
COUNT(*) | ||
0 | ||
|
||
4. Verify COUNT(*) with WHERE clause - other columns | ||
|
||
explain select COUNT(*) from t1 WHERE a > 1 and b < 3 and id > 4; | ||
id select_type table partitions type possible_keys key key_len ref rows filtered Extra | ||
1 SIMPLE t1 NULL range PRIMARY PRIMARY 8 NULL ROWS FILTERED Using where | ||
|
||
SELECT COUNT(*) from t1 WHERE a > 1 and b < 3 and id > 4; | ||
COUNT(*) | ||
3 | ||
|
||
5. Verify COUNT(*) with GROUP BY on non-key column | ||
|
||
explain format=tree select a, COUNT(*) from t1 group by a; | ||
EXPLAIN | ||
-> Table scan on <temporary> | ||
-> Aggregate using temporary table | ||
-> Table scan on t1 (rows=1) | ||
|
||
|
||
SELECT a, COUNT(*) from t1 GROUP BY a; | ||
a COUNT(*) | ||
2 4 | ||
1 4 | ||
3 2 | ||
|
||
6. Verify COUNT(*) with GROUP BY on multiple non-key columns | ||
|
||
explain format=tree select a, b, COUNT(*) from t1 group by a, b; | ||
EXPLAIN | ||
-> Table scan on <temporary> | ||
-> Aggregate using temporary table | ||
-> Table scan on t1 (rows=1) | ||
|
||
|
||
SELECT a, b, COUNT(*) FROM t1 GROUP BY a, b; | ||
a b COUNT(*) | ||
2 2 1 | ||
1 2 2 | ||
1 1 2 | ||
2 1 2 | ||
3 1 1 | ||
2 3 1 | ||
3 2 1 | ||
|
||
7. Verify COUNT(*) with GROUP BY on non-key column, PK | ||
|
||
EXPLAIN FORMAT=TREE SELECT a, id, COUNT(*) FROM t1 GROUP BY a, id; | ||
EXPLAIN | ||
-> Table scan on <temporary> | ||
-> Aggregate using temporary table | ||
-> Table scan on t1 (rows=1) | ||
|
||
|
||
SELECT a, id, COUNT(*) FROM t1 GROUP BY a, id; | ||
a id COUNT(*) | ||
2 1 1 | ||
1 2 1 | ||
1 3 1 | ||
2 4 1 | ||
3 5 1 | ||
2 6 1 | ||
3 7 1 | ||
2 8 1 | ||
1 9 1 | ||
1 10 1 | ||
|
||
8. Verify COUNT(*) with GROUP BY on PK, non-key column | ||
|
||
EXPLAIN FORMAT=TREE SELECT id, a, COUNT(*) from t1 GROUP BY id, a; | ||
EXPLAIN | ||
-> Table scan on <temporary> | ||
-> Aggregate using temporary table | ||
-> Table scan on t1 (rows=1) | ||
|
||
|
||
SELECT id, a, COUNT(*) from t1 GROUP BY id, a; | ||
id a COUNT(*) | ||
1 2 1 | ||
2 1 1 | ||
3 1 1 | ||
4 2 1 | ||
5 3 1 | ||
6 2 1 | ||
7 3 1 | ||
8 2 1 | ||
9 1 1 | ||
10 1 1 | ||
|
||
9. Verify COUNT(*) with GROUP BY on non-key column, vector column | ||
|
||
EXPLAIN FORMAT=TREE SELECT a, vector1, COUNT(*) FROM t1 GROUP BY a, vector1; | ||
EXPLAIN | ||
-> Table scan on <temporary> | ||
-> Aggregate using temporary table | ||
-> Table scan on t1 (rows=1) | ||
|
||
|
||
SELECT a, vector1, COUNT(*) FROM t1 GROUP BY a, vector1; | ||
a vector1 COUNT(*) | ||
2 [1, 2, 3] 1 | ||
1 [2, 22, 31] 1 | ||
1 [1, 2, 4] 1 | ||
2 [2, 22, 33] 1 | ||
3 [11, 22, 4] 1 | ||
2 [7, 22, 33] 1 | ||
3 [10, 20, 40] 1 | ||
2 [20, 22, 41] 1 | ||
1 [20, 10, 30] 1 | ||
1 [25, 25, 41] 1 | ||
|
||
10. Verify COUNT(*) with GROUP BY on vecror column, non-key column | ||
|
||
EXPLAIN FORMAT=TREE SELECT vector1, a, COUNT(*) FROM t1 GROUP BY vector1, a; | ||
EXPLAIN | ||
-> Table scan on <temporary> | ||
-> Aggregate using temporary table | ||
-> Table scan on t1 (rows=1) | ||
|
||
|
||
SELECT vector1, a, COUNT(*) FROM t1 GROUP BY vector1, a; | ||
vector1 a COUNT(*) | ||
[1, 2, 3] 2 1 | ||
[2, 22, 31] 1 1 | ||
[1, 2, 4] 1 1 | ||
[2, 22, 33] 2 1 | ||
[11, 22, 4] 3 1 | ||
[7, 22, 33] 2 1 | ||
[10, 20, 40] 3 1 | ||
[20, 22, 41] 2 1 | ||
[20, 10, 30] 1 1 | ||
[25, 25, 41] 1 1 | ||
|
||
11. Verify COUNT(*) with GROUP BY on vector column, PK | ||
|
||
EXPLAIN FORMAT=TREE SELECT vector1, id, COUNT(*) FROM t1 GROUP BY vector1, id; | ||
EXPLAIN | ||
-> Table scan on <temporary> | ||
-> Aggregate using temporary table | ||
-> Table scan on t1 (rows=1) | ||
|
||
|
||
SELECT vector1, id, COUNT(*) FROM t1 GROUP BY vector1, id; | ||
vector1 id COUNT(*) | ||
[1, 2, 3] 1 1 | ||
[2, 22, 31] 2 1 | ||
[1, 2, 4] 3 1 | ||
[2, 22, 33] 4 1 | ||
[11, 22, 4] 5 1 | ||
[7, 22, 33] 6 1 | ||
[10, 20, 40] 7 1 | ||
[20, 22, 41] 8 1 | ||
[20, 10, 30] 9 1 | ||
[25, 25, 41] 10 1 | ||
|
||
12. Verify COUNT(*) with GROUP BY on PK, vector column | ||
|
||
EXPLAIN FORMAT=TREE SELECT id, vector1, COUNT(*) FROM t1 GROUP BY id, vector1; | ||
EXPLAIN | ||
-> Table scan on <temporary> | ||
-> Aggregate using temporary table | ||
-> Table scan on t1 (rows=1) | ||
|
||
|
||
SELECT id, vector1, COUNT(*) FROM t1 GROUP BY id, vector1; | ||
id vector1 COUNT(*) | ||
1 [1, 2, 3] 1 | ||
2 [2, 22, 31] 1 | ||
3 [1, 2, 4] 1 | ||
4 [2, 22, 33] 1 | ||
5 [11, 22, 4] 1 | ||
6 [7, 22, 33] 1 | ||
7 [10, 20, 40] 1 | ||
8 [20, 22, 41] 1 | ||
9 [20, 10, 30] 1 | ||
10 [25, 25, 41] 1 | ||
|
||
13. Verify COUNT(*) with GROUP BY on vector column, PK, non-key column | ||
|
||
EXPLAIN FORMAT=TREE SELECT vector1, id, a, COUNT(*) FROM t1 GROUP BY vector1, id, a; | ||
EXPLAIN | ||
-> Table scan on <temporary> | ||
-> Aggregate using temporary table | ||
-> Table scan on t1 (rows=1) | ||
|
||
|
||
SELECT vector1, id, a, COUNT(*) FROM t1 GROUP BY vector1, id, a; | ||
vector1 id a COUNT(*) | ||
[1, 2, 3] 1 2 1 | ||
[2, 22, 31] 2 1 1 | ||
[1, 2, 4] 3 1 1 | ||
[2, 22, 33] 4 2 1 | ||
[11, 22, 4] 5 3 1 | ||
[7, 22, 33] 6 2 1 | ||
[10, 20, 40] 7 3 1 | ||
[20, 22, 41] 8 2 1 | ||
[20, 10, 30] 9 1 1 | ||
[25, 25, 41] 10 1 1 | ||
|
||
14. Verify COUNT(*) with GROUP BY on PK, vector column, non-key column | ||
|
||
EXPLAIN FORMAT=TREE SELECT id, vector1, a, COUNT(*) FROM t1 GROUP BY id, vector1, a; | ||
EXPLAIN | ||
-> Table scan on <temporary> | ||
-> Aggregate using temporary table | ||
-> Table scan on t1 (rows=1) | ||
|
||
|
||
SELECT id, vector1, a, COUNT(*) FROM t1 GROUP BY id, vector1, a; | ||
id vector1 a COUNT(*) | ||
1 [1, 2, 3] 2 1 | ||
2 [2, 22, 31] 1 1 | ||
3 [1, 2, 4] 1 1 | ||
4 [2, 22, 33] 2 1 | ||
5 [11, 22, 4] 3 1 | ||
6 [7, 22, 33] 2 1 | ||
7 [10, 20, 40] 3 1 | ||
8 [20, 22, 41] 2 1 | ||
9 [20, 10, 30] 1 1 | ||
10 [25, 25, 41] 1 1 | ||
drop table t1; |
Oops, something went wrong.