Skip to content

Commit

Permalink
Use numrows_pre_compression in approx row count
Browse files Browse the repository at this point in the history
The approximate_row_count function was using the reltuples from
compressed chunks and multiplying that with 1000 which is the default
batch size. This was leading to a huge skew between the actual row
count and the approximate one. We now use the numrows_pre_compression
value from the timescaledb catalog which accurately represents the
number of rows before the actual compression.
  • Loading branch information
nikkhils committed Dec 1, 2023
1 parent 1843104 commit e25a779
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 17 deletions.
1 change: 1 addition & 0 deletions .unreleased/fix_6365
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixes: #6365 Use numrows_pre_compression in approximate row count
35 changes: 19 additions & 16 deletions sql/size_utils.sql
Original file line number Diff line number Diff line change
Expand Up @@ -453,14 +453,15 @@ BEGIN
IF local_compressed_hypertable_id IS NOT NULL THEN
uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation);

WITH compressed_hypertable AS (SELECT table_name, schema_name FROM _timescaledb_catalog.hypertable ht
WHERE ht.id = local_compressed_hypertable_id)
SELECT c.oid INTO compressed_hypertable_oid FROM pg_class c
INNER JOIN compressed_hypertable h ON (c.relname = h.table_name)
INNER JOIN pg_namespace n ON (n.nspname = h.schema_name);

compressed_row_count = _timescaledb_functions.get_approx_row_count(compressed_hypertable_oid);
RETURN (uncompressed_row_count + (compressed_row_count * max_compressed_row_count));
-- use the compression_chunk_size stats to fetch precompressed num rows
SELECT COALESCE(SUM(numrows_pre_compression), 0) FROM _timescaledb_catalog.chunk srcch,
_timescaledb_catalog.compression_chunk_size map, _timescaledb_catalog.hypertable srcht
INTO compressed_row_count
WHERE map.chunk_id = srcch.id
AND srcht.id = srcch.hypertable_id AND srcht.table_name = local_table_name
AND srcht.schema_name = local_schema_name;

RETURN (uncompressed_row_count + compressed_row_count);
ELSE
uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation);
RETURN uncompressed_row_count;
Expand All @@ -477,22 +478,24 @@ BEGIN
-- 'input is chunk #1';
IF is_compressed_chunk IS NULL AND local_compressed_chunk_id IS NOT NULL THEN
-- 'Include both uncompressed and compressed chunk #2';
WITH compressed_ns_oid AS ( SELECT table_name, oid FROM _timescaledb_catalog.chunk ch INNER JOIN pg_namespace ns ON
(ch.id = local_compressed_chunk_id and ch.schema_name = ns.nspname))
SELECT c.oid FROM pg_class c INNER JOIN compressed_ns_oid
ON ( c.relnamespace = compressed_ns_oid.oid AND c.relname = compressed_ns_oid.table_name)
INTO local_compressed_chunk_oid;
-- use the compression_chunk_size stats to fetch precompressed num rows
SELECT COALESCE(numrows_pre_compression, 0) FROM _timescaledb_catalog.compression_chunk_size
INTO compressed_row_count
WHERE compressed_chunk_id = local_compressed_chunk_id;

uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation);
compressed_row_count = _timescaledb_functions.get_approx_row_count(local_compressed_chunk_oid);
RETURN uncompressed_row_count + (compressed_row_count * max_compressed_row_count);
RETURN (uncompressed_row_count + compressed_row_count);
ELSIF is_compressed_chunk IS NULL AND local_compressed_chunk_id IS NULL THEN
-- 'input relation is uncompressed chunk #3';
uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation);
RETURN uncompressed_row_count;
ELSE
-- 'compressed chunk only #4';
compressed_row_count = _timescaledb_functions.get_approx_row_count(relation) * max_compressed_row_count;
-- use the compression_chunk_size stats to fetch precompressed num rows
SELECT COALESCE(SUM(numrows_pre_compression), 0) FROM _timescaledb_catalog.chunk srcch,
_timescaledb_catalog.compression_chunk_size map INTO compressed_row_count
WHERE map.compressed_chunk_id = srcch.id
AND srcch.table_name = local_table_name AND srcch.schema_name = local_schema_name;
RETURN compressed_row_count;
END IF;
END IF;
Expand Down
2 changes: 1 addition & 1 deletion tsl/test/expected/compression.out
Original file line number Diff line number Diff line change
Expand Up @@ -1274,7 +1274,7 @@ SELECT compress_chunk(c) FROM show_chunks('stattest') c;
SELECT approximate_row_count('stattest');
approximate_row_count
-----------------------
0
26
(1 row)

-- Uncompressed chunk table is empty since we just compressed the chunk and moved everything to compressed chunk table.
Expand Down

0 comments on commit e25a779

Please sign in to comment.