Skip to content

Commit

Permalink
Merge pull request #34060 from amosbird/optimizetupleorderby
Browse files Browse the repository at this point in the history
Make ORDER BY tuple almost as fast as ORDER BY columns
  • Loading branch information
kitaisreal authored Jan 29, 2022
2 parents 8029d79 + 04cff63 commit f345302
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 100 deletions.
168 changes: 82 additions & 86 deletions src/Interpreters/sortBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <Columns/ColumnConst.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnTuple.h>
#include <Common/typeid_cast.h>
#include <Functions/FunctionHelpers.h>

Expand All @@ -22,6 +23,32 @@ static bool isCollationRequired(const SortColumnDescription & description)
return description.collator != nullptr;
}

/// Column with description for sort
struct ColumnWithSortDescription
{
const IColumn * column = nullptr;
SortColumnDescription description;

/// It means, that this column is ColumnConst
bool column_const = false;
};
using ColumnsWithSortDescriptions = std::vector<ColumnWithSortDescription>;

void flattenTupleColumnRecursively(
ColumnsWithSortDescriptions & res, const ColumnTuple * tuple, const SortColumnDescription & description, bool has_collation)
{
for (const auto & column : tuple->getColumns())
{
if (const auto * subtuple = typeid_cast<const ColumnTuple *>(column.get()))
flattenTupleColumnRecursively(res, subtuple, description, has_collation);
else
{
res.emplace_back(ColumnWithSortDescription{column.get(), description, isColumnConst(*column)});
if (has_collation && !res.back().column->isCollationSupported())
res.back().description.collator = nullptr;
}
}
}

ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description)
{
Expand All @@ -35,13 +62,22 @@ ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, c
? block.getByName(description[i].column_name).column.get()
: block.safeGetByPosition(description[i].column_number).column.get();

res.emplace_back(ColumnWithSortDescription{column, description[i], isColumnConst(*column)});
if (isCollationRequired(description[i]))
{
if (!column->isCollationSupported())
throw Exception(
"Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, "
"containing them.",
ErrorCodes::BAD_COLLATION);
}
if (const auto * tuple = typeid_cast<const ColumnTuple *>(column))
flattenTupleColumnRecursively(res, tuple, description[i], isCollationRequired(description[i]));
else
res.emplace_back(ColumnWithSortDescription{column, description[i], isColumnConst(*column)});
}

return res;
}


struct PartialSortingLess
{
const ColumnsWithSortDescriptions & columns;
Expand Down Expand Up @@ -107,116 +143,76 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
if (!block)
return;

/// If only one column to sort by
if (description.size() == 1)
ColumnsWithSortDescriptions columns_with_sort_desc = getColumnsWithSortDescription(block, description);
bool all_const = true;
for (const auto & column : columns_with_sort_desc)
{
IColumn::Permutation perm;
bool reverse = description[0].direction == -1;

const IColumn * column = !description[0].column_name.empty()
? block.getByName(description[0].column_name).column.get()
: block.safeGetByPosition(description[0].column_number).column.get();

bool is_column_const = false;
if (isCollationRequired(description[0]))
{
if (!column->isCollationSupported())
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);

if (isColumnConst(*column))
is_column_const = true;
else
column->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm);
}
else if (!isColumnConst(*column))
if (!column.column_const)
{
int nan_direction_hint = description[0].nulls_direction;
column->getPermutation(reverse, limit, nan_direction_hint, perm);
all_const = false;
break;
}
else
/// we don't need to do anything with const column
is_column_const = true;
}
if (all_const)
return;

size_t columns = block.columns();
for (size_t i = 0; i < columns; ++i)
IColumn::Permutation perm;
/// If only one column to sort by
if (columns_with_sort_desc.size() == 1)
{
bool reverse = columns_with_sort_desc[0].description.direction == -1;
const IColumn * column = columns_with_sort_desc[0].column;
if (isCollationRequired(description[0]))
column->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm);
else
{
if (!is_column_const)
block.getByPosition(i).column = block.getByPosition(i).column->permute(perm, limit);
int nan_direction_hint = columns_with_sort_desc[0].description.nulls_direction;
column->getPermutation(reverse, limit, nan_direction_hint, perm);
}
}
else
{
size_t size = block.rows();
IColumn::Permutation perm(size);
perm.resize(size);
for (size_t i = 0; i < size; ++i)
perm[i] = i;

if (limit >= size)
limit = 0;

bool need_collation = false;
ColumnsWithSortDescriptions columns_with_sort_desc = getColumnsWithSortDescription(block, description);

for (size_t i = 0, num_sort_columns = description.size(); i < num_sort_columns; ++i)
EqualRanges ranges;
ranges.emplace_back(0, perm.size());
for (const auto & column : columns_with_sort_desc)
{
const IColumn * column = columns_with_sort_desc[i].column;
if (isCollationRequired(description[i]))
{
if (!column->isCollationSupported())
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
while (!ranges.empty() && limit && limit <= ranges.back().first)
ranges.pop_back();

need_collation = true;
}
}
if (ranges.empty())
break;

if (need_collation)
{
EqualRanges ranges;
ranges.emplace_back(0, perm.size());
for (const auto & column : columns_with_sort_desc)
if (column.column_const)
continue;

if (isCollationRequired(column.description))
{
while (!ranges.empty() && limit && limit <= ranges.back().first)
ranges.pop_back();

if (ranges.empty())
break;

if (column.column_const)
continue;

if (isCollationRequired(column.description))
{
column.column->updatePermutationWithCollation(
*column.description.collator, column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
}
else
{
column.column->updatePermutation(
column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
}
column.column->updatePermutationWithCollation(
*column.description.collator,
column.description.direction < 0,
limit,
column.description.nulls_direction,
perm,
ranges);
}
}
else
{
EqualRanges ranges;
ranges.emplace_back(0, perm.size());
for (const auto & column : columns_with_sort_desc)
else
{
while (!ranges.empty() && limit && limit <= ranges.back().first)
ranges.pop_back();

if (ranges.empty())
break;

column.column->updatePermutation(
column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
}
}

size_t columns = block.columns();
for (size_t i = 0; i < columns; ++i)
block.getByPosition(i).column = block.getByPosition(i).column->permute(perm, limit);
}
size_t columns = block.columns();
for (size_t i = 0; i < columns; ++i)
block.getByPosition(i).column = block.getByPosition(i).column->permute(perm, limit);
}


Expand Down
14 changes: 0 additions & 14 deletions src/Interpreters/sortBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,4 @@ void stableGetPermutation(const Block & block, const SortDescription & descripti
*/
bool isAlreadySorted(const Block & block, const SortDescription & description);

/// Column with description for sort
struct ColumnWithSortDescription
{
const IColumn * column = nullptr;
SortColumnDescription description;

/// It means, that this column is ColumnConst
bool column_const = false;
};

using ColumnsWithSortDescriptions = std::vector<ColumnWithSortDescription>;

ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description);

}
8 changes: 8 additions & 0 deletions tests/performance/order_by_tuple.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<test>
<tags>
<tag>sorting</tag>
<tag>comparison</tag>
</tags>

<query>select * from numbers(300000000) order by (1 - number , number + 1 , number) limit 10;</query>
</test>

0 comments on commit f345302

Please sign in to comment.