Skip to content

Commit

Permalink
test: Add serialization backward compatibility test for ApproxMostFre…
Browse files Browse the repository at this point in the history
…quentStreamSummary (#12242)

Summary: Pull Request resolved: #12242

Reviewed By: kagamiori

Differential Revision: D69069936

fbshipit-source-id: 7c14dfc73f32e4609ee3a340d635d3dec8b4ae83
  • Loading branch information
Yuhta authored and facebook-github-bot committed Feb 6, 2025
1 parent 1eb7987 commit 1c32192
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 3 deletions.
6 changes: 5 additions & 1 deletion velox/functions/lib/ApproxMostFrequentStreamSummary.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ struct ApproxMostFrequentStreamSummary {
/// Calculate the size needed for serialization.
size_t serializedByteSize() const;

/// Serialize the summary into bytes.
/// Serialize the summary into bytes. The serialzation should be always
/// backward compatible, meaning newer code should always be able to read
/// serialization from old version. Essentially this means the serialization
/// format should not change.
///
/// @param out Pre-allocated memory at least serializedByteSize() in size
void serialize(char* out) const;

Expand Down
62 changes: 60 additions & 2 deletions velox/functions/lib/tests/ApproxMostFrequentStreamSummaryTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,32 @@
* limitations under the License.
*/

#include <gtest/gtest.h>

#include "velox/functions/lib/ApproxMostFrequentStreamSummary.h"
#include "velox/functions/lib/ZetaDistribution.h"

#include <folly/base64.h>
#include <gtest/gtest.h>

namespace facebook::velox::functions {
namespace {

int capacity(int k, double alpha) {
return k * pow(k / alpha, 1 / alpha);
}

template <typename T>
std::string encodeBase64(const ApproxMostFrequentStreamSummary<T>& summary) {
std::string data(summary.serializedByteSize(), '\0');
summary.serialize(data.data());
return folly::base64Encode(data);
}

std::string decodeBase64(std::string_view input) {
std::string decoded(folly::base64DecodedSize(input), '\0');
folly::base64Decode(input, decoded.data());
return decoded;
}

TEST(ApproxMostFrequentStreamSummaryTest, exact) {
const int capacity = 10;
for (int totalCount : {0, 5, 10}) {
Expand Down Expand Up @@ -156,6 +170,50 @@ TEST(ApproxMostFrequentStreamSummaryTest, serializeStringView) {
EXPECT_EQ(summary2.topK(10), topK);
}

template <typename T>
void testSerializationCompatibility() {
SCOPED_TRACE(typeid(T).name());
const char* expectedData;
std::string strings[101];
ApproxMostFrequentStreamSummary<T> expected;
expected.setCapacity(100);
if constexpr (std::is_same_v<T, int>) {
expectedData =
"ZAAAAAEAAAACAAAAAwAAAAQAAAAFAAAABgAAAAcAAAAIAAAACQAAAAoAAAALAAAADAAAAA0AAAAOAAAADwAAABAAAAARAAAAEgAAABMAAAAUAAAAFQAAABYAAAAXAAAAGAAAABkAAAAaAAAAGwAAABwAAAAdAAAAHgAAAB8AAAAgAAAAIQAAACIAAAAjAAAAJAAAACUAAAAmAAAAJwAAACgAAAApAAAAKgAAACsAAAAsAAAALQAAAC4AAAAvAAAAMAAAADEAAAAyAAAAMwAAADQAAAA1AAAANgAAADcAAAA4AAAAOQAAADoAAAA7AAAAPAAAAD0AAAA+AAAAPwAAAEAAAABBAAAAQgAAAEMAAABEAAAARQAAAEYAAABHAAAASAAAAEkAAABKAAAASwAAAEwAAABNAAAATgAAAE8AAABQAAAAUQAAAFIAAABTAAAAVAAAAFUAAABWAAAAVwAAAFgAAABZAAAAWgAAAFsAAABcAAAAXQAAAF4AAABfAAAAYAAAAGEAAABiAAAAYwAAAGQAAAABAAAAAAAAAAIAAAAAAAAAAwAAAAAAAAAEAAAAAAAAAAUAAAAAAAAABgAAAAAAAAAHAAAAAAAAAAgAAAAAAAAACQAAAAAAAAAKAAAAAAAAAAsAAAAAAAAADAAAAAAAAAANAAAAAAAAAA4AAAAAAAAADwAAAAAAAAAQAAAAAAAAABEAAAAAAAAAEgAAAAAAAAATAAAAAAAAABQAAAAAAAAAFQAAAAAAAAAWAAAAAAAAABcAAAAAAAAAGAAAAAAAAAAZAAAAAAAAABoAAAAAAAAAGwAAAAAAAAAcAAAAAAAAAB0AAAAAAAAAHgAAAAAAAAAfAAAAAAAAACAAAAAAAAAAIQAAAAAAAAAiAAAAAAAAACMAAAAAAAAAJAAAAAAAAAAlAAAAAAAAACYAAAAAAAAAJwAAAAAAAAAoAAAAAAAAACkAAAAAAAAAKgAAAAAAAAArAAAAAAAAACwAAAAAAAAALQAAAAAAAAAuAAAAAAAAAC8AAAAAAAAAMAAAAAAAAAAxAAAAAAAAADIAAAAAAAAAMwAAAAAAAAA0AAAAAAAAADUAAAAAAAAANgAAAAAAAAA3AAAAAAAAADgAAAAAAAAAOQAAAAAAAAA6AAAAAAAAADsAAAAAAAAAPAAAAAAAAAA9AAAAAAAAAD4AAAAAAAAAPwAAAAAAAABAAAAAAAAAAEEAAAAAAAAAQgAAAAAAAABDAAAAAAAAAEQAAAAAAAAARQAAAAAAAABGAAAAAAAAAEcAAAAAAAAASAAAAAAAAABJAAAAAAAAAEoAAAAAAAAASwAAAAAAAABMAAAAAAAAAE0AAAAAAAAATgAAAAAAAABPAAAAAAAAAFAAAAAAAAAAUQAAAAAAAABSAAAAAAAAAFMAAAAAAAAAVAAAAAAAAABVAAAAAAAAAFYAAAAAAAAAVwAAAAAAAABYAAAAAAAAAFkAAAAAAAAAWgAAAAAAAABbAAAAAAAAAFwAAAAAAAAAXQAAAAAAAABeAAAAAAAAAF8AAAAAAAAAYAAAAAAAAABhAAAAAAAAAGIAAAAAAAAAYwAAAAAAAABkAAAAAAAAAA==";
for (int i = 1; i <= 100; ++i) {
for (int j = 0; j < i; ++j) {
expected.insert(i);
}
}
} else if constexpr (std::is_same_v<T, StringView>) {
expectedData =
"ZAAAAAEAAAAxAAAAAAAAAAAAAAABAAAAMgAAAAAAAAAAAAAAAQAAADMAAAAAAAAAAAAAAAEAAAA0AAAAAAAAAAAAAAABAAAANQAAAAAAAAAAAAAAAQAAADYAAAAAAAAAAAAAAAEAAAA3AAAAAAAAAAAAAAABAAAAOAAAAAAAAAAAAAAAAQAAADkAAAAAAAAAAAAAAAIAAAAxMAAAAAAAAAAAAAACAAAAMTEAAAAAAAAAAAAAAgAAADEyAAAAAAAAAAAAAAIAAAAxMwAAAAAAAAAAAAACAAAAMTQAAAAAAAAAAAAAAgAAADE1AAAAAAAAAAAAAAIAAAAxNgAAAAAAAAAAAAACAAAAMTcAAAAAAAAAAAAAAgAAADE4AAAAAAAAAAAAAAIAAAAxOQAAAAAAAAAAAAACAAAAMjAAAAAAAAAAAAAAAgAAADIxAAAAAAAAAAAAAAIAAAAyMgAAAAAAAAAAAAACAAAAMjMAAAAAAAAAAAAAAgAAADI0AAAAAAAAAAAAAAIAAAAyNQAAAAAAAAAAAAACAAAAMjYAAAAAAAAAAAAAAgAAADI3AAAAAAAAAAAAAAIAAAAyOAAAAAAAAAAAAAACAAAAMjkAAAAAAAAAAAAAAgAAADMwAAAAAAAAAAAAAAIAAAAzMQAAAAAAAAAAAAACAAAAMzIAAAAAAAAAAAAAAgAAADMzAAAAAAAAAAAAAAIAAAAzNAAAAAAAAAAAAAACAAAAMzUAAAAAAAAAAAAAAgAAADM2AAAAAAAAAAAAAAIAAAAzNwAAAAAAAAAAAAACAAAAMzgAAAAAAAAAAAAAAgAAADM5AAAAAAAAAAAAAAIAAAA0MAAAAAAAAAAAAAACAAAANDEAAAAAAAAAAAAAAgAAADQyAAAAAAAAAAAAAAIAAAA0MwAAAAAAAAAAAAACAAAANDQAAAAAAAAAAAAAAgAAADQ1AAAAAAAAAAAAAAIAAAA0NgAAAAAAAAAAAAACAAAANDcAAAAAAAAAAAAAAgAAADQ4AAAAAAAAAAAAAAIAAAA0OQAAAAAAAAAAAAACAAAANTAAAAAAAAAAAAAAAgAAADUxAAAAAAAAAAAAAAIAAAA1MgAAAAAAAAAAAAACAAAANTMAAAAAAAAAAAAAAgAAADU0AAAAAAAAAAAAAAIAAAA1NQAAAAAAAAAAAAACAAAANTYAAAAAAAAAAAAAAgAAADU3AAAAAAAAAAAAAAIAAAA1OAAAAAAAAAAAAAACAAAANTkAAAAAAAAAAAAAAgAAADYwAAAAAAAAAAAAAAIAAAA2MQAAAAAAAAAAAAACAAAANjIAAAAAAAAAAAAAAgAAADYzAAAAAAAAAAAAAAIAAAA2NAAAAAAAAAAAAAACAAAANjUAAAAAAAAAAAAAAgAAADY2AAAAAAAAAAAAAAIAAAA2NwAAAAAAAAAAAAACAAAANjgAAAAAAAAAAAAAAgAAADY5AAAAAAAAAAAAAAIAAAA3MAAAAAAAAAAAAAACAAAANzEAAAAAAAAAAAAAAgAAADcyAAAAAAAAAAAAAAIAAAA3MwAAAAAAAAAAAAACAAAANzQAAAAAAAAAAAAAAgAAADc1AAAAAAAAAAAAAAIAAAA3NgAAAAAAAAAAAAACAAAANzcAAAAAAAAAAAAAAgAAADc4AAAAAAAAAAAAAAIAAAA3OQAAAAAAAAAAAAACAAAAODAAAAAAAAAAAAAAAgAAADgxAAAAAAAAAAAAAAIAAAA4MgAAAAAAAAAAAAACAAAAODMAAAAAAAAAAAAAAgAAADg0AAAAAAAAAAAAAAIAAAA4NQAAAAAAAAAAAAACAAAAODYAAAAAAAAAAAAAAgAAADg3AAAAAAAAAAAAAAIAAAA4OAAAAAAAAAAAAAACAAAAODkAAAAAAAAAAAAAAgAAADkwAAAAAAAAAAAAAAIAAAA5MQAAAAAAAAAAAAACAAAAOTIAAAAAAAAAAAAAAgAAADkzAAAAAAAAAAAAAAIAAAA5NAAAAAAAAAAAAAACAAAAOTUAAAAAAAAAAAAAAgAAADk2AAAAAAAAAAAAAAIAAAA5NwAAAAAAAAAAAAACAAAAOTgAAAAAAAAAAAAAAgAAADk5AAAAAAAAAAAAAAMAAAAxMDAAAAAAAAAAAAABAAAAAAAAAAIAAAAAAAAAAwAAAAAAAAAEAAAAAAAAAAUAAAAAAAAABgAAAAAAAAAHAAAAAAAAAAgAAAAAAAAACQAAAAAAAAAKAAAAAAAAAAsAAAAAAAAADAAAAAAAAAANAAAAAAAAAA4AAAAAAAAADwAAAAAAAAAQAAAAAAAAABEAAAAAAAAAEgAAAAAAAAATAAAAAAAAABQAAAAAAAAAFQAAAAAAAAAWAAAAAAAAABcAAAAAAAAAGAAAAAAAAAAZAAAAAAAAABoAAAAAAAAAGwAAAAAAAAAcAAAAAAAAAB0AAAAAAAAAHgAAAAAAAAAfAAAAAAAAACAAAAAAAAAAIQAAAAAAAAAiAAAAAAAAACMAAAAAAAAAJAAAAAAAAAAlAAAAAAAAACYAAAAAAAAAJwAAAAAAAAAoAAAAAAAAACkAAAAAAAAAKgAAAAAAAAArAAAAAAAAACwAAAAAAAAALQAAAAAAAAAuAAAAAAAAAC8AAAAAAAAAMAAAAAAAAAAxAAAAAAAAADIAAAAAAAAAMwAAAAAAAAA0AAAAAAAAADUAAAAAAAAANgAAAAAAAAA3AAAAAAAAADgAAAAAAAAAOQAAAAAAAAA6AAAAAAAAADsAAAAAAAAAPAAAAAAAAAA9AAAAAAAAAD4AAAAAAAAAPwAAAAAAAABAAAAAAAAAAEEAAAAAAAAAQgAAAAAAAABDAAAAAAAAAEQAAAAAAAAARQAAAAAAAABGAAAAAAAAAEcAAAAAAAAASAAAAAAAAABJAAAAAAAAAEoAAAAAAAAASwAAAAAAAABMAAAAAAAAAE0AAAAAAAAATgAAAAAAAABPAAAAAAAAAFAAAAAAAAAAUQAAAAAAAABSAAAAAAAAAFMAAAAAAAAAVAAAAAAAAABVAAAAAAAAAFYAAAAAAAAAVwAAAAAAAABYAAAAAAAAAFkAAAAAAAAAWgAAAAAAAABbAAAAAAAAAFwAAAAAAAAAXQAAAAAAAABeAAAAAAAAAF8AAAAAAAAAYAAAAAAAAABhAAAAAAAAAGIAAAAAAAAAYwAAAAAAAABkAAAAAAAAAA==";
for (int i = 1; i <= 100; ++i) {
strings[i] = std::to_string(i);
for (int j = 0; j < i; ++j) {
expected.insert(StringView(strings[i]));
}
}
} else {
VELOX_UNREACHABLE();
}
ASSERT_EQ(encodeBase64(expected), expectedData);
auto data = decodeBase64(expectedData);
ApproxMostFrequentStreamSummary<T> actual;
actual.setCapacity(100);
actual.mergeSerialized(data.data());
ASSERT_EQ(actual.size(), expected.size());
for (int i = 0; i < actual.size(); ++i) {
ASSERT_EQ(actual.values()[i], expected.values()[i]);
ASSERT_EQ(actual.counts()[i], expected.counts()[i]);
}
}

TEST(ApproxMostFrequentStreamSummaryTest, serializationCompatibility) {
testSerializationCompatibility<int>();
testSerializationCompatibility<StringView>();
}

TEST(ApproxMostFrequentStreamSummaryTest, mergeSerialized) {
constexpr int kSummaryCount = 10;
constexpr int kCapacity = 30;
Expand Down

0 comments on commit 1c32192

Please sign in to comment.