Skip to content

Commit

Permalink
sql: generate stats for multi-column inverted index columns
Browse files Browse the repository at this point in the history
This commit allows stats to be created for columns in multi-column
inverted indexes.

Also, it fixes a bug the caused histograms to not be collected for
non-inverted columns referenced in partial inverted index predicates.

Release note (bug fix): Statistics are now correctly generated for
columns in multi-column inverted indexes and columns referenced in
partial inverted index predicates.
  • Loading branch information
mgartner committed Feb 2, 2021
1 parent 8abb0e3 commit 26db058
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 5 deletions.
10 changes: 8 additions & 2 deletions pkg/sql/create_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,9 +377,10 @@ func createStatsDefaultColumns(

// Add column stats for each secondary index.
for _, idx := range desc.PublicNonPrimaryIndexes() {
isInverted := idx.GetType() == descpb.IndexDescriptor_INVERTED
for j, n := 0, idx.NumColumns(); j < n; j++ {
// Only the last column of an inverted index is an inverted column.
isInverted := idx.GetType() == descpb.IndexDescriptor_INVERTED && j == n-1

for j := 0; j < idx.NumColumns(); j++ {
// Generate stats for each indexed column.
addIndexColumnStatsIfNotExists(idx.GetColumnID(j), isInverted)

Expand Down Expand Up @@ -420,6 +421,11 @@ func createStatsDefaultColumns(

// Generate stats for each column individually.
for _, colID := range colIDs.Ordered() {
col, err := desc.FindColumnByID(colID)
if err != nil {
return nil, err
}
isInverted := colinfo.ColumnTypeIsInvertedIndexable(col.Type)
addIndexColumnStatsIfNotExists(colID, isInverted)
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/distsql_plan_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func (dsp *DistSQLPlanner) createStatsPlan(
// with different configurations). See #50655.
col := s.columns[0]
for _, index := range desc.PublicNonPrimaryIndexes() {
if index.GetType() == descpb.IndexDescriptor_INVERTED && index.GetColumnID(0) == col {
if index.GetType() == descpb.IndexDescriptor_INVERTED && index.InvertedColumnID() == col {
spec.Index = index.IndexDesc()
break
}
Expand Down
39 changes: 37 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -798,8 +798,16 @@ upper_bound range_rows distinct_range_rows equal_rows
# Test that columns referenced in partial index predicates always have
# histograms collected for them, with up to 200 buckets.
statement ok
CREATE TABLE partial (a INT, b INT, c INT, INDEX (a) WHERE b > 0 OR c > 0);
INSERT INTO partial VALUES (1, 1, 1), (2, 2, 2), (3, 3, 3);
CREATE TABLE partial (
a INT,
b INT,
c INT,
d INT,
j JSON,
INDEX (a) WHERE b > 0 OR c > 0,
INVERTED INDEX (j) WHERE d = 10
);
INSERT INTO partial VALUES (1, 1, 1, 1, '{"a": "b"}'), (2, 2, 2, 10, '{"c": "d"}'), (3, 3, 3, 1, '{"e": "f"}');
CREATE STATISTICS s FROM partial

query TTIIB colnames,rowsort
Expand All @@ -818,6 +826,8 @@ statistics_name column_names row_count null_count has_histogram
s {a} 3 0 true
s {b} 3 0 true
s {c} 3 0 true
s {d} 3 0 true
s {j} 3 0 true
s {rowid} 3 0 true

let $hist_id_1
Expand Down Expand Up @@ -960,6 +970,31 @@ upper_bound
'\x42fd4700000000000000000000000000000000bcc00000000000003ffbecde5da115a83ff661bdc396bcdc' 0 0 1
'\x42fd5ad4000000000000000000000000000000bcc00000000000003ffbecde5da115a83ff661bdc396bcdc' 0 0 1

# Stats for multi-column inverted indexes.
statement ok
CREATE TABLE multi_col (
id INT PRIMARY KEY,
s STRING,
j JSON,
INVERTED INDEX (s, j)
);
INSERT INTO multi_col VALUES (1, 'foo', '{"a": "b"}');
CREATE STATISTICS s FROM multi_col;

query TB colnames
SELECT
column_names,
histogram_id IS NOT NULL AS has_histogram
FROM
[SHOW STATISTICS FOR TABLE multi_col]
ORDER BY
column_names::STRING, created
----
column_names has_histogram
{id} true
{j} true
{s} true

# Regression test for #56356. Histograms on all-null columns should not cause
# an error.
statement ok
Expand Down

0 comments on commit 26db058

Please sign in to comment.