Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancement/issue 901 support group array sorted #909

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 83 additions & 6 deletions base/base/sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,104 @@

#include <pdqsort.h>

#ifndef NDEBUG
#include <pcg_random.hpp>
#include <base/getThreadId.h>
/** Same as libcxx std::__debug_less. Just without dependency on private part of standard library.
* Check that Comparator induce strict weak ordering.
*/
template <typename Comparator>
class DebugLessComparator
{
public:
constexpr DebugLessComparator(Comparator & cmp_)
: cmp(cmp_)
{}
template <typename LhsType, typename RhsType>
constexpr bool operator()(const LhsType & lhs, const RhsType & rhs)
{
bool lhs_less_than_rhs = cmp(lhs, rhs);
if (lhs_less_than_rhs)
assert(!cmp(rhs, lhs));
return lhs_less_than_rhs;
}
template <typename LhsType, typename RhsType>
constexpr bool operator()(LhsType & lhs, RhsType & rhs)
{
bool lhs_less_than_rhs = cmp(lhs, rhs);
if (lhs_less_than_rhs)
assert(!cmp(rhs, lhs));
return lhs_less_than_rhs;
}
private:
Comparator & cmp;
};
template <typename Comparator>
using ComparatorWrapper = DebugLessComparator<Comparator>;
template <typename RandomIt>
void shuffle(RandomIt first, RandomIt last)
{
static thread_local pcg64 rng(getThreadId());
std::shuffle(first, last, rng);
}
#else
template <typename Comparator>
using ComparatorWrapper = Comparator;
#endif
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"

#include <miniselect/floyd_rivest_select.h>

template <typename RandomIt>
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
template <typename RandomIt, typename Compare>
void nth_element(RandomIt first, RandomIt nth, RandomIt last, Compare compare)
{
::miniselect::floyd_rivest_select(first, nth, last);
#ifndef NDEBUG
::shuffle(first, last);
#endif

ComparatorWrapper<Compare> compare_wrapper = compare;
::miniselect::floyd_rivest_select(first, nth, last, compare_wrapper);

#ifndef NDEBUG
::shuffle(first, nth);

if (nth != last)
::shuffle(nth + 1, last);
#endif
}

template <typename RandomIt>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
{
::miniselect::floyd_rivest_partial_sort(first, middle, last);
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;

::nth_element(first, nth, last, comparator());
}

template <typename RandomIt, typename Compare>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare)
{
::miniselect::floyd_rivest_partial_sort(first, middle, last, compare);
#ifndef NDEBUG
::shuffle(first, last);
#endif

ComparatorWrapper<Compare> compare_wrapper = compare;
::miniselect::floyd_rivest_partial_sort(first, middle, last, compare_wrapper);

#ifndef NDEBUG
::shuffle(middle, last);
#endif
}

template <typename RandomIt>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
{
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;

::partial_sort(first, middle, last, comparator());
}

#pragma GCC diagnostic pop
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
---
toc_priority: 112
---

# groupArraySorted {#groupArraySorted}

Returns an array with the first N items in ascending order.

``` sql
groupArraySorted(N)(column)
```

**Arguments**

- `N` – The number of elements to return.

- `column` – The value (Integer, String, Float and other Generic types).

**Example**

Gets the first 10 numbers:

``` sql
SELECT groupArraySorted(10)(number) FROM numbers(100)
```

``` text
┌─groupArraySorted(10)(number)─┐
│ [0,1,2,3,4,5,6,7,8,9] │
└──────────────────────────────┘
```


Gets all the String implementations of all numbers in column:

``` sql
SELECT groupArraySorted(5)(str) FROM (SELECT toString(number) as str FROM numbers(5));

```

``` text
┌─groupArraySorted(5)(str)─┐
│ ['0','1','2','3','4'] │
└──────────────────────────┘
```
11 changes: 6 additions & 5 deletions src/AggregateFunctions/AggregateFunctionGroupArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataType
// return std::make_shared<GroupArrayGeneralListImpl<GroupArrayListNodeGeneral, Trait>>(argument_type, std::forward<TArgs>(args)...);
}


template <bool Tlast>
AggregateFunctionPtr createAggregateFunctionGroupArray(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
Expand Down Expand Up @@ -79,9 +79,9 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

if (!limit_size)
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<false, Sampler::NONE>>(argument_types[0], parameters);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<false, Tlast, Sampler::NONE>>(argument_types[0], parameters);
else
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Sampler::NONE>>(argument_types[0], parameters, max_elems);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Tlast, Sampler::NONE>>(argument_types[0], parameters, max_elems);
}

AggregateFunctionPtr createAggregateFunctionGroupArraySample(
Expand Down Expand Up @@ -114,7 +114,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(
else
seed = thread_local_rng();

return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Sampler::RNG>>(argument_types[0], parameters, max_elems, seed);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, /* Tlast= */ false, Sampler::RNG>>(argument_types[0], parameters, max_elems, seed);
}

}
Expand All @@ -124,8 +124,9 @@ void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };

factory.registerFunction("group_array", { createAggregateFunctionGroupArray, properties });
factory.registerFunction("group_array", { createAggregateFunctionGroupArray<false>, properties });
factory.registerFunction("group_array_sample", { createAggregateFunctionGroupArraySample, properties });
factory.registerFunction("group_array_last", { createAggregateFunctionGroupArray<true>, properties });
}

}
Loading