Skip to content

Commit

Permalink
Prestissimo ApproxMostFrequent JSON (facebookincubator#12189)
Browse files Browse the repository at this point in the history
Summary:

Prestissimo ApproxMostFrequent is not implemented for JSON. This PR adds support for JSON type.

Differential Revision: D68287956
  • Loading branch information
natashasehgal authored and facebook-github-bot committed Jan 28, 2025
1 parent 9999ae0 commit 246b99a
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 2 deletions.
3 changes: 2 additions & 1 deletion velox/expression/FunctionSignature.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ void validateBaseTypeAndCollectTypeParams(

if (!isPositiveInteger(typeName) &&
!tryMapNameToTypeKind(typeName).has_value() &&
!isDecimalName(typeName) && !isDateName(typeName)) {
!isDecimalName(typeName) && !isDateName(typeName) &&
typeName != "JSON") {
VELOX_USER_CHECK(hasType(typeName), "Type doesn't exist: '{}'", typeName);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,13 @@ void registerApproxMostFrequentAggregate(
bool overwrite) {
std::vector<std::shared_ptr<exec::AggregateFunctionSignature>> signatures;
for (const auto& valueType :
{"boolean", "tinyint", "smallint", "integer", "bigint", "varchar"}) {
{"boolean",
"tinyint",
"smallint",
"integer",
"bigint",
"varchar",
"json"}) {
signatures.push_back(
exec::AggregateFunctionSignatureBuilder()
.returnType(fmt::format("map({},bigint)", valueType))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,5 +281,38 @@ TEST_F(ApproxMostFrequentTestBoolean, basic) {
{input}, {"c0"}, {"approx_most_frequent(3, c5, 31)"}, {expected});
}

class ApproxMostFrequentTestJson : public AggregationTestBase {
protected:
void SetUp() override {
AggregationTestBase::SetUp();
}
};

TEST_F(ApproxMostFrequentTestJson, basic) {
// JSON strings as input
std::vector<std::string> jsonStrings = {
"{\"type\": \"store\"}",
"{\"type\": \"fruit\"}",
"{\"type\": \"fruit\"}",
"{\"type\": \"book\"}",
"{\"type\": \"store\"}",
"{\"type\": \"fruit\"}"};

auto inputVector = makeFlatVector<StringView>(
static_cast<vector_size_t>(jsonStrings.size()),
[&](auto row) { return StringView(jsonStrings[row]); });

MapVectorPtr expectedMap = makeMapVector<StringView, int64_t>(
{{{StringView("{\"type\": \"fruit\"}"), 3},
{StringView("{\"type\": \"store\"}"), 2}}});
auto expected = makeRowVector({{expectedMap}});

testAggregations(
{makeRowVector({inputVector})},
{},
{"approx_most_frequent(2, c0, 31)"},
{expected});
}

} // namespace
} // namespace facebook::velox::aggregate::test
2 changes: 2 additions & 0 deletions velox/functions/prestosql/fuzzer/AggregationFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "velox/functions/prestosql/fuzzer/MinMaxByResultVerifier.h"
#include "velox/functions/prestosql/fuzzer/MinMaxInputGenerator.h"
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"
#include "velox/functions/prestosql/types/JsonType.h"
#include "velox/functions/prestosql/window/WindowFunctionsRegistration.h"
#include "velox/vector/fuzzer/VectorFuzzer.h"

Expand Down Expand Up @@ -101,6 +102,7 @@ int main(int argc, char** argv) {
// experience, and initialize glog and gflags.
folly::Init init(&argc, &argv);

facebook::velox::registerJsonType();
// Register only presto supported signatures if we are verifying against
// Presto.
if (FLAGS_presto_url.empty()) {
Expand Down
2 changes: 2 additions & 0 deletions velox/functions/prestosql/fuzzer/WindowFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "velox/functions/prestosql/fuzzer/MinMaxInputGenerator.h"
#include "velox/functions/prestosql/fuzzer/WindowOffsetInputGenerator.h"
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"
#include "velox/functions/prestosql/types/JsonType.h"
#include "velox/functions/prestosql/window/WindowFunctionsRegistration.h"
#include "velox/vector/fuzzer/VectorFuzzer.h"

Expand Down Expand Up @@ -92,6 +93,7 @@ getCustomInputGenerators() {
} // namespace facebook::velox::exec::test

int main(int argc, char** argv) {
facebook::velox::registerJsonType();
facebook::velox::aggregate::prestosql::registerAllAggregateFunctions(
"", false, true);
facebook::velox::aggregate::prestosql::registerInternalAggregateFunctions("");
Expand Down

0 comments on commit 246b99a

Please sign in to comment.