Skip to content

Commit

Permalink
Properly detect key columns for expression indexes
Browse files Browse the repository at this point in the history
Previously for insert on hypertables with unique expression indexes
we used to decompress full chunk as we would do no batch filtering
in the dml compression. This patch adds batch filtering for columns
that are part of an unique expression index but not filtered by an
expression.
  • Loading branch information
svenklemm committed Dec 22, 2023
1 parent 8ecce0d commit 781cbe3
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 5 deletions.
68 changes: 67 additions & 1 deletion tsl/src/compression/compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -2101,6 +2101,72 @@ create_segment_filter_scankey(RowDecompressor *decompressor, char *segment_filte
return num_scankeys;
}

/*
* For insert into compressed chunks with unique index determine the
* columns which are safe to use for batch filtering.
*
* This is based on RelationGetIndexAttrBitmap from postgres with changes
* to also track unique expression indexes.
*/
static Bitmapset *
compressed_insert_key_columns(Relation relation)
{
Bitmapset *indexattrs = NULL; /* indexed columns */
ListCell *l;

/* Fast path if definitely no indexes */
if (!RelationGetForm(relation)->relhasindex)
return NULL;

List *indexoidlist = RelationGetIndexList(relation);

/* Fall out if no indexes (but relhasindex was set) */
if (indexoidlist == NIL)
return NULL;

/*
* For each index, add referenced attributes to indexattrs.
*
* Note: we consider all indexes returned by RelationGetIndexList, even if
* they are not indisready or indisvalid. This is important because an
* index for which CREATE INDEX CONCURRENTLY has just started must be
* included in HOT-safety decisions (see README.HOT). If a DROP INDEX
* CONCURRENTLY is far enough along that we should ignore the index, it
* won't be returned at all by RelationGetIndexList.
*/
foreach (l, indexoidlist)
{
Oid indexOid = lfirst_oid(l);
Relation indexDesc = index_open(indexOid, AccessShareLock);

if (!indexDesc->rd_index->indisunique)
{
index_close(indexDesc, AccessShareLock);
continue;
}

/* Collect simple attribute references.
* For covering indexes we only need to collect the key attributes.
* Unlike RelationGetIndexAttrBitmap we allow expression indexes
* but we do not extract attributes from the expressions as that
* would not be a safe filter as the expression can alter attributes
* which would not make them sufficient for batch filtering.
*/
for (int i = 0; i < indexDesc->rd_index->indnkeyatts; i++)
{
int attrnum = indexDesc->rd_index->indkey.values[i];
if (attrnum != 0)
{
indexattrs =
bms_add_member(indexattrs, attrnum - FirstLowInvalidHeapAttributeNumber);
}
}
index_close(indexDesc, AccessShareLock);
}

return indexattrs;
}

void
decompress_batches_for_insert(ChunkInsertState *cis, Chunk *chunk, TupleTableSlot *slot)
{
Expand Down Expand Up @@ -2133,7 +2199,7 @@ decompress_batches_for_insert(ChunkInsertState *cis, Chunk *chunk, TupleTableSlo
Relation in_rel = relation_open(comp->table_id, RowExclusiveLock);

RowDecompressor decompressor = build_decompressor(in_rel, out_rel);
Bitmapset *key_columns = RelationGetIndexAttrBitmap(out_rel, INDEX_ATTR_BITMAP_KEY);
Bitmapset *key_columns = compressed_insert_key_columns(out_rel);
Bitmapset *null_columns = NULL;

int num_scankeys;
Expand Down
24 changes: 23 additions & 1 deletion tsl/test/shared/expected/compress_unique_index.out
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,33 @@ SELECT compress_chunk(show_chunks('compress_unique')) IS NOT NULL AS compress;
INSERT INTO compress_unique VALUES ('2000-01-01','m1','c2','2000-01-01');
ERROR: duplicate key value violates unique constraint "_hyper_X_X_chunk_uniq_expr"
\set ON_ERROR_STOP 1
-- should only decompress 1 batch
EXPLAIN (analyze,costs off,summary off,timing off) INSERT INTO compress_unique VALUES ('2000-01-01','m1','c2','2000-01-02');
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
Batches decompressed: 1
Tuples decompressed: 1
-> Insert on compress_unique (actual rows=0 loops=1)
-> Custom Scan (ChunkDispatch) (actual rows=1 loops=1)
-> Result (actual rows=1 loops=1)
(6 rows)

-- should decompress no batches
EXPLAIN (analyze,costs off,summary off,timing off) INSERT INTO compress_unique VALUES ('2000-01-01','m1','c3','2000-01-02');
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
-> Insert on compress_unique (actual rows=0 loops=1)
-> Custom Scan (ChunkDispatch) (actual rows=1 loops=1)
-> Result (actual rows=1 loops=1)
(4 rows)

SELECT * FROM compress_unique ORDER BY compress_unique;
offset_timestamp | meter_id | meter_channel_id | timestamp
------------------------------+----------+------------------+------------------------------
Sat Jan 01 00:00:00 2000 PST | m1 | c1 | Sat Jan 01 00:00:00 2000 PST
Sat Jan 01 00:00:00 2000 PST | m1 | c2 | Sat Jan 01 00:00:00 2000 PST
(2 rows)
Sat Jan 01 00:00:00 2000 PST | m1 | c2 | Sun Jan 02 00:00:00 2000 PST
Sat Jan 01 00:00:00 2000 PST | m1 | c3 | Sun Jan 02 00:00:00 2000 PST
(4 rows)

DROP TABLE compress_unique;
5 changes: 2 additions & 3 deletions tsl/test/shared/sql/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ set(TEST_FILES_SHARED
cagg_compression.sql
classify_relation.sql
compat.sql
compress_unique_index.sql
constify_timestamptz_op_interval.sql
constraint_aware_append.sql
constraint_exclusion_prepared.sql
Expand All @@ -17,8 +16,8 @@ set(TEST_TEMPLATES_SHARED
space_constraint.sql.in)

if((${PG_VERSION_MAJOR} GREATER_EQUAL "14"))
list(APPEND TEST_FILES_SHARED compression_dml.sql decompress_tracking.sql
memoize.sql)
list(APPEND TEST_FILES_SHARED compress_unique_index.sql compression_dml.sql
decompress_tracking.sql memoize.sql)
endif()

# this test was changing the contents of tables in shared_setup.sql thus causing
Expand Down
6 changes: 6 additions & 0 deletions tsl/test/shared/sql/compress_unique_index.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ SELECT compress_chunk(show_chunks('compress_unique')) IS NOT NULL AS compress;
INSERT INTO compress_unique VALUES ('2000-01-01','m1','c2','2000-01-01');
\set ON_ERROR_STOP 1

-- should only decompress 1 batch
EXPLAIN (analyze,costs off,summary off,timing off) INSERT INTO compress_unique VALUES ('2000-01-01','m1','c2','2000-01-02');

-- should decompress no batches
EXPLAIN (analyze,costs off,summary off,timing off) INSERT INTO compress_unique VALUES ('2000-01-01','m1','c3','2000-01-02');

SELECT * FROM compress_unique ORDER BY compress_unique;

DROP TABLE compress_unique;
Expand Down

0 comments on commit 781cbe3

Please sign in to comment.