Skip to content

Commit

Permalink
Refactor batch segment metadata builder for bloom filters (#7597)
Browse files Browse the repository at this point in the history
We plan to implement bloom filter sparse indexes for compressed data, so
the compressed batch metadata builder requires some cosmetic changes to
support other types of metadata.
  • Loading branch information
akuzm authored Jan 22, 2025
1 parent f0996a4 commit f3df13a
Show file tree
Hide file tree
Showing 11 changed files with 292 additions and 241 deletions.
4 changes: 2 additions & 2 deletions tsl/src/compression/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
set(SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/api.c
${CMAKE_CURRENT_SOURCE_DIR}/batch_metadata_builder_minmax.c
${CMAKE_CURRENT_SOURCE_DIR}/compression.c
${CMAKE_CURRENT_SOURCE_DIR}/compression_dml.c
${CMAKE_CURRENT_SOURCE_DIR}/compression_scankey.c
${CMAKE_CURRENT_SOURCE_DIR}/compression_storage.c
${CMAKE_CURRENT_SOURCE_DIR}/create.c
${CMAKE_CURRENT_SOURCE_DIR}/recompress.c
${CMAKE_CURRENT_SOURCE_DIR}/segment_meta.c)
${CMAKE_CURRENT_SOURCE_DIR}/recompress.c)
target_sources(${TSL_LIBRARY_NAME} PRIVATE ${SOURCES})

add_subdirectory(algorithms)
22 changes: 22 additions & 0 deletions tsl/src/compression/batch_metadata_builder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* This file and its contents are licensed under the Timescale License.
* Please see the included NOTICE for copyright information and
* LICENSE-TIMESCALE for a copy of the license.
*/
#pragma once

typedef struct RowCompressor RowCompressor;

typedef struct BatchMetadataBuilder
{
void (*update_val)(void *builder, Datum val);
void (*update_null)(void *builder);

void (*insert_to_compressed_row)(void *builder, RowCompressor *compressor);

void (*reset)(void *builder, RowCompressor *compressor);
} BatchMetadataBuilder;

BatchMetadataBuilder *batch_metadata_builder_minmax_create(Oid type, Oid collation,
int min_attr_offset,
int max_attr_offset);
184 changes: 184 additions & 0 deletions tsl/src/compression/batch_metadata_builder_minmax.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
/*
* This file and its contents are licensed under the Timescale License.
* Please see the included NOTICE for copyright information and
* LICENSE-TIMESCALE for a copy of the license.
*/
#include <postgres.h>
#include <libpq/pqformat.h>
#include <utils/builtins.h>
#include <utils/datum.h>
#include <utils/sortsupport.h>
#include <utils/typcache.h>

#include "batch_metadata_builder_minmax.h"

#include "compression.h"

static void minmax_update_val(void *builder_, Datum val);
static void minmax_update_null(void *builder_);
static void minmax_insert_to_compressed_row(void *builder_, RowCompressor *compressor);
static void minmax_reset(void *builder_, RowCompressor *compressor);

BatchMetadataBuilder *
batch_metadata_builder_minmax_create(Oid type_oid, Oid collation, int min_attr_offset,
int max_attr_offset)
{
BatchMetadataBuilderMinMax *builder = palloc(sizeof(*builder));
TypeCacheEntry *type = lookup_type_cache(type_oid, TYPECACHE_LT_OPR);

if (!OidIsValid(type->lt_opr))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify an less-than operator for type %s",
format_type_be(type_oid))));

*builder = (BatchMetadataBuilderMinMax){
.functions =
(BatchMetadataBuilder){
.update_val = minmax_update_val,
.update_null = minmax_update_null,
.insert_to_compressed_row = minmax_insert_to_compressed_row,
.reset = minmax_reset,
},
.type_oid = type_oid,
.empty = true,
.has_null = false,
.type_by_val = type->typbyval,
.type_len = type->typlen,
.min_metadata_attr_offset = min_attr_offset,
.max_metadata_attr_offset = max_attr_offset,
};

builder->ssup.ssup_cxt = CurrentMemoryContext;
builder->ssup.ssup_collation = collation;
builder->ssup.ssup_nulls_first = false;

PrepareSortSupportFromOrderingOp(type->lt_opr, &builder->ssup);

return &builder->functions;
}

void
minmax_update_val(void *builder_, Datum val)
{
BatchMetadataBuilderMinMax *builder = (BatchMetadataBuilderMinMax *) builder_;

int cmp;

if (builder->empty)
{
builder->min = datumCopy(val, builder->type_by_val, builder->type_len);
builder->max = datumCopy(val, builder->type_by_val, builder->type_len);
builder->empty = false;
return;
}

cmp = ApplySortComparator(builder->min, false, val, false, &builder->ssup);
if (cmp > 0)
{
if (!builder->type_by_val)
pfree(DatumGetPointer(builder->min));
builder->min = datumCopy(val, builder->type_by_val, builder->type_len);
}

cmp = ApplySortComparator(builder->max, false, val, false, &builder->ssup);
if (cmp < 0)
{
if (!builder->type_by_val)
pfree(DatumGetPointer(builder->max));
builder->max = datumCopy(val, builder->type_by_val, builder->type_len);
}
}

void
minmax_update_null(void *builder_)
{
BatchMetadataBuilderMinMax *builder = (BatchMetadataBuilderMinMax *) builder_;
builder->has_null = true;
}

static void
minmax_reset(void *builder_, RowCompressor *compressor)
{
BatchMetadataBuilderMinMax *builder = (BatchMetadataBuilderMinMax *) builder_;
if (!builder->empty)
{
if (!builder->type_by_val)
{
pfree(DatumGetPointer(builder->min));
pfree(DatumGetPointer(builder->max));
}
builder->min = 0;
builder->max = 0;
}
builder->empty = true;
builder->has_null = false;

compressor->compressed_is_null[builder->max_metadata_attr_offset] = true;
compressor->compressed_is_null[builder->min_metadata_attr_offset] = true;
compressor->compressed_values[builder->min_metadata_attr_offset] = 0;
compressor->compressed_values[builder->max_metadata_attr_offset] = 0;
}

Datum
batch_metadata_builder_minmax_min(void *builder_)
{
BatchMetadataBuilderMinMax *builder = (BatchMetadataBuilderMinMax *) builder_;
if (builder->empty)
elog(ERROR, "trying to get min from an empty builder");
if (builder->type_len == -1)
{
Datum unpacked = PointerGetDatum(PG_DETOAST_DATUM_PACKED(builder->min));
if (builder->min != unpacked)
pfree(DatumGetPointer(builder->min));
builder->min = unpacked;
}
return builder->min;
}

Datum
batch_metadata_builder_minmax_max(void *builder_)
{
BatchMetadataBuilderMinMax *builder = (BatchMetadataBuilderMinMax *) builder_;
if (builder->empty)
elog(ERROR, "trying to get max from an empty builder");
if (builder->type_len == -1)
{
Datum unpacked = PointerGetDatum(PG_DETOAST_DATUM_PACKED(builder->max));
if (builder->max != unpacked)
pfree(DatumGetPointer(builder->max));
builder->max = unpacked;
}
return builder->max;
}

bool
batch_metadata_builder_minmax_empty(void *builder_)
{
BatchMetadataBuilderMinMax *builder = (BatchMetadataBuilderMinMax *) builder_;
return builder->empty;
}

static void
minmax_insert_to_compressed_row(void *builder_, RowCompressor *compressor)
{
BatchMetadataBuilderMinMax *builder = (BatchMetadataBuilderMinMax *) builder_;
Assert(builder->min_metadata_attr_offset >= 0);
Assert(builder->max_metadata_attr_offset >= 0);

if (!batch_metadata_builder_minmax_empty(builder))
{
compressor->compressed_is_null[builder->min_metadata_attr_offset] = false;
compressor->compressed_is_null[builder->max_metadata_attr_offset] = false;

compressor->compressed_values[builder->min_metadata_attr_offset] =
batch_metadata_builder_minmax_min(builder);
compressor->compressed_values[builder->max_metadata_attr_offset] =
batch_metadata_builder_minmax_max(builder);
}
else
{
compressor->compressed_is_null[builder->min_metadata_attr_offset] = true;
compressor->compressed_is_null[builder->max_metadata_attr_offset] = true;
}
}
44 changes: 44 additions & 0 deletions tsl/src/compression/batch_metadata_builder_minmax.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* This file and its contents are licensed under the Timescale License.
* Please see the included NOTICE for copyright information and
* LICENSE-TIMESCALE for a copy of the license.
*/
#pragma once

#include <postgres.h>
#include <fmgr.h>
#include <lib/stringinfo.h>
#include <utils/sortsupport.h>

#include "batch_metadata_builder.h"

typedef struct BatchMetadataBuilderMinMax
{
BatchMetadataBuilder functions;

Oid type_oid;
bool empty;
bool has_null;

SortSupportData ssup;
bool type_by_val;
int16 type_len;
Datum min;
Datum max;

int16 min_metadata_attr_offset;
int16 max_metadata_attr_offset;
} BatchMetadataBuilderMinMax;

typedef struct BatchMetadataBuilderMinMax BatchMetadataBuilderMinMax;

typedef struct RowCompressor RowCompressor;

/*
* This is exposed only for the old unit tests. Ideally they should be replaced
* with functional tests inspecting the compressed chunk table, and this
* test-only interface should be removed.
*/
Datum batch_metadata_builder_minmax_min(void *builder_);
Datum batch_metadata_builder_minmax_max(void *builder_);
bool batch_metadata_builder_minmax_empty(void *builder_);
Loading

0 comments on commit f3df13a

Please sign in to comment.