Skip to content

Commit

Permalink
Extract AggregationFuzzerBase from AggregationFuzzer (#7916)
Browse files Browse the repository at this point in the history
Summary:

Extract AggregationFuzzerBase from AggregationFuzzer. This is needed for building WindowFuzzer
that reuses common logic in AggregaitonFuzzerBase.

This is the first piece of #7754.

Differential Revision: D51692940
  • Loading branch information
kagamiori authored and facebook-github-bot committed Dec 8, 2023
1 parent 5c586d0 commit 044b938
Show file tree
Hide file tree
Showing 11 changed files with 1,413 additions and 1,141 deletions.
205 changes: 3 additions & 202 deletions velox/exec/tests/AggregationFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <boost/random/uniform_int_distribution.hpp>
#include "velox/exec/tests/utils/AggregationFuzzerRunner.h"
#include "velox/exec/tests/utils/AggregationFuzzerUtils.h"
#include "velox/exec/tests/utils/AssertQueryBuilder.h"
#include "velox/exec/tests/utils/DuckQueryRunner.h"
#include "velox/exec/tests/utils/PlanBuilder.h"
Expand All @@ -46,207 +47,6 @@ DEFINE_string(
namespace facebook::velox::exec::test {
namespace {

class MinMaxInputGenerator : public InputGenerator {
public:
MinMaxInputGenerator(const std::string& name) : indexOfN_{indexOfN(name)} {}

std::vector<VectorPtr> generate(
const std::vector<TypePtr>& types,
VectorFuzzer& fuzzer,
FuzzerGenerator& rng,
memory::MemoryPool* pool) override {
// TODO Generate inputs free of nested nulls.
if (types.size() <= indexOfN_) {
return {};
}

// Make sure to use the same value of 'n' for all batches in a given Fuzzer
// iteration.
if (!n_.has_value()) {
n_ = boost::random::uniform_int_distribution<int64_t>(0, 9'999)(rng);
}

const auto size = fuzzer.getOptions().vectorSize;

std::vector<VectorPtr> inputs;
inputs.reserve(types.size());
for (auto i = 0; i < types.size() - 1; ++i) {
inputs.push_back(fuzzer.fuzz(types[i]));
}

VELOX_CHECK(
types.back()->isBigint(),
"Unexpected type: {}",
types.back()->toString())
inputs.push_back(
BaseVector::createConstant(BIGINT(), n_.value(), size, pool));
return inputs;
}

void reset() override {
n_.reset();
}

private:
// Returns zero-based index of the 'n' argument, 1 for min and max. 2 for
// min_by and max_by.
static int32_t indexOfN(const std::string& name) {
if (name == "min" || name == "max") {
return 1;
}

if (name == "min_by" || name == "max_by") {
return 2;
}

VELOX_FAIL("Unexpected function name: {}", name)
}

// Zero-based index of the 'n' argument.
const int32_t indexOfN_;
std::optional<int64_t> n_;
};

class ApproxDistinctInputGenerator : public InputGenerator {
public:
std::vector<VectorPtr> generate(
const std::vector<TypePtr>& types,
VectorFuzzer& fuzzer,
FuzzerGenerator& rng,
memory::MemoryPool* pool) override {
if (types.size() != 2) {
return {};
}

// Make sure to use the same value of 'e' for all batches in a given Fuzzer
// iteration.
if (!e_.has_value()) {
// Generate value in [0.0040625, 0.26] range.
static constexpr double kMin = 0.0040625;
static constexpr double kMax = 0.26;
e_ = kMin + (kMax - kMin) * boost::random::uniform_01<double>()(rng);
}

const auto size = fuzzer.getOptions().vectorSize;

VELOX_CHECK(
types.back()->isDouble(),
"Unexpected type: {}",
types.back()->toString())
return {
fuzzer.fuzz(types[0]),
BaseVector::createConstant(DOUBLE(), e_.value(), size, pool)};
}

void reset() override {
e_.reset();
}

private:
std::optional<double> e_;
};

class ApproxPercentileInputGenerator : public InputGenerator {
public:
std::vector<VectorPtr> generate(
const std::vector<TypePtr>& types,
VectorFuzzer& fuzzer,
FuzzerGenerator& rng,
memory::MemoryPool* pool) override {
// The arguments are: x, [w], percentile(s), [accuracy].
//
// First argument is always 'x'. If second argument's type is BIGINT, then
// it is 'w'. Otherwise, it is percentile(x).

const auto size = fuzzer.getOptions().vectorSize;

std::vector<VectorPtr> inputs;
inputs.reserve(types.size());
inputs.push_back(fuzzer.fuzz(types[0]));

if (types[1]->isBigint()) {
velox::test::VectorMaker vectorMaker{pool};
auto weight = vectorMaker.flatVector<int64_t>(size, [&](auto row) {
return boost::random::uniform_int_distribution<int64_t>(1, 1'000)(rng);
});

inputs.push_back(weight);
}

const int percentileTypeIndex = types[1]->isBigint() ? 2 : 1;
const TypePtr& percentileType = types[percentileTypeIndex];
if (percentileType->isDouble()) {
if (!percentile_.has_value()) {
percentile_ = pickPercentile(fuzzer, rng);
}

inputs.push_back(BaseVector::createConstant(
DOUBLE(), percentile_.value(), size, pool));
} else {
VELOX_CHECK(percentileType->isArray());
VELOX_CHECK(percentileType->childAt(0)->isDouble());

if (percentiles_.empty()) {
percentiles_.push_back(pickPercentile(fuzzer, rng));
percentiles_.push_back(pickPercentile(fuzzer, rng));
percentiles_.push_back(pickPercentile(fuzzer, rng));
}

auto arrayVector =
BaseVector::create<ArrayVector>(ARRAY(DOUBLE()), 1, pool);
auto elementsVector = arrayVector->elements()->asFlatVector<double>();
elementsVector->resize(percentiles_.size());
for (auto i = 0; i < percentiles_.size(); ++i) {
elementsVector->set(i, percentiles_[i]);
}
arrayVector->setOffsetAndSize(0, 0, percentiles_.size());

inputs.push_back(BaseVector::wrapInConstant(size, 0, arrayVector));
}

if (types.size() > percentileTypeIndex + 1) {
// Last argument is 'accuracy'.
VELOX_CHECK(types.back()->isDouble());
if (!accuracy_.has_value()) {
accuracy_ = boost::random::uniform_01<double>()(rng);
}

inputs.push_back(
BaseVector::createConstant(DOUBLE(), accuracy_.value(), size, pool));
}

return inputs;
}

void reset() override {
percentile_.reset();
percentiles_.clear();
accuracy_.reset();
}

private:
double pickPercentile(VectorFuzzer& fuzzer, FuzzerGenerator& rng) {
// 10% of the times generate random value in [0, 1] range.
// 90% of the times use one of the common values.
if (fuzzer.coinToss(0.1)) {
return boost::random::uniform_01<double>()(rng);
}

static const std::vector<double> kPercentiles = {
0.1, 0.25, 0.5, 0.75, 0.90, 0.95, 0.99, 0.999, 0.9999};

const auto index =
boost::random::uniform_int_distribution<uint32_t>()(rng) %
kPercentiles.size();

return kPercentiles[index];
}

std::optional<double> percentile_;
std::vector<double> percentiles_;
std::optional<double> accuracy_;
};

std::unordered_map<std::string, std::shared_ptr<InputGenerator>>
getCustomInputGenerators() {
return {
Expand Down Expand Up @@ -905,8 +705,9 @@ int main(int argc, char** argv) {
};

using Runner = facebook::velox::exec::test::AggregationFuzzerRunner;
using Options = facebook::velox::exec::test::AggregationFuzzerOptions;

Runner::Options options;
Options options;
options.onlyFunctions = FLAGS_only;
options.skipFunctions = skipFunctions;
options.customVerificationFunctions = customVerificationFunctions;
Expand Down
2 changes: 2 additions & 0 deletions velox/exec/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ add_executable(velox_aggregation_fuzzer_test AggregationFuzzerTest.cpp)
target_link_libraries(
velox_aggregation_fuzzer_test
velox_aggregation_fuzzer
velox_aggregation_fuzzer_base
velox_aggregates
velox_window
velox_vector_test_lib
Expand All @@ -215,6 +216,7 @@ add_executable(spark_aggregation_fuzzer_test SparkAggregationFuzzerTest.cpp)
target_link_libraries(
spark_aggregation_fuzzer_test
velox_aggregation_fuzzer
velox_aggregation_fuzzer_base
velox_functions_spark_aggregates
velox_window
velox_vector_test_lib
Expand Down
4 changes: 3 additions & 1 deletion velox/exec/tests/SparkAggregationFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <unordered_set>

#include "velox/exec/tests/utils/AggregationFuzzerRunner.h"
#include "velox/exec/tests/utils/AggregationFuzzerUtils.h"
#include "velox/exec/tests/utils/DuckQueryRunner.h"
#include "velox/functions/sparksql/aggregates/Register.h"

Expand Down Expand Up @@ -78,8 +79,9 @@ int main(int argc, char** argv) {
});

using Runner = facebook::velox::exec::test::AggregationFuzzerRunner;
using Options = facebook::velox::exec::test::AggregationFuzzerOptions;

Runner::Options options;
Options options;
options.onlyFunctions = FLAGS_only;
options.skipFunctions = skipFunctions;
options.customVerificationFunctions = customVerificationFunctions;
Expand Down
Loading

0 comments on commit 044b938

Please sign in to comment.