From 01c742e2627dbaaaabc378dae29fe816e6f937a8 Mon Sep 17 00:00:00 2001 From: Nikhil Sontakke Date: Thu, 30 Nov 2023 19:52:08 +0530 Subject: [PATCH] Use numrows_pre_compression in approx row count The approximate_row_count function was using the reltuples from compressed chunks and multiplying that with 1000 which is the default batch size. This was leading to a huge skew between the actual row count and the approximate one. We now use the numrows_pre_compression value from the timescaledb catalog which accurately represents the number of rows before the actual compression. Fixes #1422 --- .unreleased/fix_6365 | 1 + sql/size_utils.sql | 35 +++++++++++++++++++---------------- 2 files changed, 20 insertions(+), 16 deletions(-) create mode 100644 .unreleased/fix_6365 diff --git a/.unreleased/fix_6365 b/.unreleased/fix_6365 new file mode 100644 index 00000000000..c261f5a59d0 --- /dev/null +++ b/.unreleased/fix_6365 @@ -0,0 +1 @@ +Fixes: #6365 Use numrows_pre_compression in approximate row count diff --git a/sql/size_utils.sql b/sql/size_utils.sql index 19645534094..a362ffb861b 100644 --- a/sql/size_utils.sql +++ b/sql/size_utils.sql @@ -453,14 +453,15 @@ BEGIN IF local_compressed_hypertable_id IS NOT NULL THEN uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation); - WITH compressed_hypertable AS (SELECT table_name, schema_name FROM _timescaledb_catalog.hypertable ht - WHERE ht.id = local_compressed_hypertable_id) - SELECT c.oid INTO compressed_hypertable_oid FROM pg_class c - INNER JOIN compressed_hypertable h ON (c.relname = h.table_name) - INNER JOIN pg_namespace n ON (n.nspname = h.schema_name); - - compressed_row_count = _timescaledb_functions.get_approx_row_count(compressed_hypertable_oid); - RETURN (uncompressed_row_count + (compressed_row_count * max_compressed_row_count)); + -- use the compression_chunk_size stats to fetch precompressed num rows + SELECT COALESCE(SUM(numrows_pre_compression), 0) FROM _timescaledb_catalog.chunk srcch, + _timescaledb_catalog.compression_chunk_size map, _timescaledb_catalog.hypertable srcht + INTO compressed_row_count + WHERE map.chunk_id = srcch.id + AND srcht.id = srcch.hypertable_id AND srcht.table_name = local_table_name + AND srcht.schema_name = local_schema_name; + + RETURN (uncompressed_row_count + compressed_row_count); ELSE uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation); RETURN uncompressed_row_count; @@ -477,22 +478,24 @@ BEGIN -- 'input is chunk #1'; IF is_compressed_chunk IS NULL AND local_compressed_chunk_id IS NOT NULL THEN -- 'Include both uncompressed and compressed chunk #2'; - WITH compressed_ns_oid AS ( SELECT table_name, oid FROM _timescaledb_catalog.chunk ch INNER JOIN pg_namespace ns ON - (ch.id = local_compressed_chunk_id and ch.schema_name = ns.nspname)) - SELECT c.oid FROM pg_class c INNER JOIN compressed_ns_oid - ON ( c.relnamespace = compressed_ns_oid.oid AND c.relname = compressed_ns_oid.table_name) - INTO local_compressed_chunk_oid; + -- use the compression_chunk_size stats to fetch precompressed num rows + SELECT COALESCE(numrows_pre_compression, 0) FROM _timescaledb_catalog.compression_chunk_size + INTO compressed_row_count + WHERE compressed_chunk_id = local_compressed_chunk_id; uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation); - compressed_row_count = _timescaledb_functions.get_approx_row_count(local_compressed_chunk_oid); - RETURN uncompressed_row_count + (compressed_row_count * max_compressed_row_count); + RETURN (uncompressed_row_count + compressed_row_count); ELSIF is_compressed_chunk IS NULL AND local_compressed_chunk_id IS NULL THEN -- 'input relation is uncompressed chunk #3'; uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation); RETURN uncompressed_row_count; ELSE -- 'compressed chunk only #4'; - compressed_row_count = _timescaledb_functions.get_approx_row_count(relation) * max_compressed_row_count; + -- use the compression_chunk_size stats to fetch precompressed num rows + SELECT COALESCE(SUM(numrows_pre_compression), 0) FROM _timescaledb_catalog.chunk srcch, + _timescaledb_catalog.compression_chunk_size map INTO compressed_row_count + WHERE map.compressed_chunk_id = srcch.id + AND srcch.table_name = local_table_name AND srcch.schema_name = local_schema_name; RETURN compressed_row_count; END IF; END IF;