From 1c32192ce91c2ac6dd2e1a9b894aae1a10f0f633 Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Thu, 6 Feb 2025 07:55:25 -0800 Subject: [PATCH] test: Add serialization backward compatibility test for ApproxMostFrequentStreamSummary (#12242) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/12242 Reviewed By: kagamiori Differential Revision: D69069936 fbshipit-source-id: 7c14dfc73f32e4609ee3a340d635d3dec8b4ae83 --- .../lib/ApproxMostFrequentStreamSummary.h | 6 +- .../ApproxMostFrequentStreamSummaryTest.cpp | 62 ++++++++++++++++++- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/velox/functions/lib/ApproxMostFrequentStreamSummary.h b/velox/functions/lib/ApproxMostFrequentStreamSummary.h index bc49cd5b2507..b34197c15c30 100644 --- a/velox/functions/lib/ApproxMostFrequentStreamSummary.h +++ b/velox/functions/lib/ApproxMostFrequentStreamSummary.h @@ -58,7 +58,11 @@ struct ApproxMostFrequentStreamSummary { /// Calculate the size needed for serialization. size_t serializedByteSize() const; - /// Serialize the summary into bytes. + /// Serialize the summary into bytes. The serialzation should be always + /// backward compatible, meaning newer code should always be able to read + /// serialization from old version. Essentially this means the serialization + /// format should not change. + /// /// @param out Pre-allocated memory at least serializedByteSize() in size void serialize(char* out) const; diff --git a/velox/functions/lib/tests/ApproxMostFrequentStreamSummaryTest.cpp b/velox/functions/lib/tests/ApproxMostFrequentStreamSummaryTest.cpp index 7b1da8d67e30..533316aa0996 100644 --- a/velox/functions/lib/tests/ApproxMostFrequentStreamSummaryTest.cpp +++ b/velox/functions/lib/tests/ApproxMostFrequentStreamSummaryTest.cpp @@ -14,11 +14,12 @@ * limitations under the License. */ -#include - #include "velox/functions/lib/ApproxMostFrequentStreamSummary.h" #include "velox/functions/lib/ZetaDistribution.h" +#include +#include + namespace facebook::velox::functions { namespace { @@ -26,6 +27,19 @@ int capacity(int k, double alpha) { return k * pow(k / alpha, 1 / alpha); } +template +std::string encodeBase64(const ApproxMostFrequentStreamSummary& summary) { + std::string data(summary.serializedByteSize(), '\0'); + summary.serialize(data.data()); + return folly::base64Encode(data); +} + +std::string decodeBase64(std::string_view input) { + std::string decoded(folly::base64DecodedSize(input), '\0'); + folly::base64Decode(input, decoded.data()); + return decoded; +} + TEST(ApproxMostFrequentStreamSummaryTest, exact) { const int capacity = 10; for (int totalCount : {0, 5, 10}) { @@ -156,6 +170,50 @@ TEST(ApproxMostFrequentStreamSummaryTest, serializeStringView) { EXPECT_EQ(summary2.topK(10), topK); } +template +void testSerializationCompatibility() { + SCOPED_TRACE(typeid(T).name()); + const char* expectedData; + std::string strings[101]; + ApproxMostFrequentStreamSummary expected; + expected.setCapacity(100); + if constexpr (std::is_same_v) { + expectedData = + "ZAAAAAEAAAACAAAAAwAAAAQAAAAFAAAABgAAAAcAAAAIAAAACQAAAAoAAAALAAAADAAAAA0AAAAOAAAADwAAABAAAAARAAAAEgAAABMAAAAUAAAAFQAAABYAAAAXAAAAGAAAABkAAAAaAAAAGwAAABwAAAAdAAAAHgAAAB8AAAAgAAAAIQAAACIAAAAjAAAAJAAAACUAAAAmAAAAJwAAACgAAAApAAAAKgAAACsAAAAsAAAALQAAAC4AAAAvAAAAMAAAADEAAAAyAAAAMwAAADQAAAA1AAAANgAAADcAAAA4AAAAOQAAADoAAAA7AAAAPAAAAD0AAAA+AAAAPwAAAEAAAABBAAAAQgAAAEMAAABEAAAARQAAAEYAAABHAAAASAAAAEkAAABKAAAASwAAAEwAAABNAAAATgAAAE8AAABQAAAAUQAAAFIAAABTAAAAVAAAAFUAAABWAAAAVwAAAFgAAABZAAAAWgAAAFsAAABcAAAAXQAAAF4AAABfAAAAYAAAAGEAAABiAAAAYwAAAGQAAAABAAAAAAAAAAIAAAAAAAAAAwAAAAAAAAAEAAAAAAAAAAUAAAAAAAAABgAAAAAAAAAHAAAAAAAAAAgAAAAAAAAACQAAAAAAAAAKAAAAAAAAAAsAAAAAAAAADAAAAAAAAAANAAAAAAAAAA4AAAAAAAAADwAAAAAAAAAQAAAAAAAAABEAAAAAAAAAEgAAAAAAAAATAAAAAAAAABQAAAAAAAAAFQAAAAAAAAAWAAAAAAAAABcAAAAAAAAAGAAAAAAAAAAZAAAAAAAAABoAAAAAAAAAGwAAAAAAAAAcAAAAAAAAAB0AAAAAAAAAHgAAAAAAAAAfAAAAAAAAACAAAAAAAAAAIQAAAAAAAAAiAAAAAAAAACMAAAAAAAAAJAAAAAAAAAAlAAAAAAAAACYAAAAAAAAAJwAAAAAAAAAoAAAAAAAAACkAAAAAAAAAKgAAAAAAAAArAAAAAAAAACwAAAAAAAAALQAAAAAAAAAuAAAAAAAAAC8AAAAAAAAAMAAAAAAAAAAxAAAAAAAAADIAAAAAAAAAMwAAAAAAAAA0AAAAAAAAADUAAAAAAAAANgAAAAAAAAA3AAAAAAAAADgAAAAAAAAAOQAAAAAAAAA6AAAAAAAAADsAAAAAAAAAPAAAAAAAAAA9AAAAAAAAAD4AAAAAAAAAPwAAAAAAAABAAAAAAAAAAEEAAAAAAAAAQgAAAAAAAABDAAAAAAAAAEQAAAAAAAAARQAAAAAAAABGAAAAAAAAAEcAAAAAAAAASAAAAAAAAABJAAAAAAAAAEoAAAAAAAAASwAAAAAAAABMAAAAAAAAAE0AAAAAAAAATgAAAAAAAABPAAAAAAAAAFAAAAAAAAAAUQAAAAAAAABSAAAAAAAAAFMAAAAAAAAAVAAAAAAAAABVAAAAAAAAAFYAAAAAAAAAVwAAAAAAAABYAAAAAAAAAFkAAAAAAAAAWgAAAAAAAABbAAAAAAAAAFwAAAAAAAAAXQAAAAAAAABeAAAAAAAAAF8AAAAAAAAAYAAAAAAAAABhAAAAAAAAAGIAAAAAAAAAYwAAAAAAAABkAAAAAAAAAA=="; + for (int i = 1; i <= 100; ++i) { + for (int j = 0; j < i; ++j) { + expected.insert(i); + } + } + } else if constexpr (std::is_same_v) { + expectedData = + "ZAAAAAEAAAAxAAAAAAAAAAAAAAABAAAAMgAAAAAAAAAAAAAAAQAAADMAAAAAAAAAAAAAAAEAAAA0AAAAAAAAAAAAAAABAAAANQAAAAAAAAAAAAAAAQAAADYAAAAAAAAAAAAAAAEAAAA3AAAAAAAAAAAAAAABAAAAOAAAAAAAAAAAAAAAAQAAADkAAAAAAAAAAAAAAAIAAAAxMAAAAAAAAAAAAAACAAAAMTEAAAAAAAAAAAAAAgAAADEyAAAAAAAAAAAAAAIAAAAxMwAAAAAAAAAAAAACAAAAMTQAAAAAAAAAAAAAAgAAADE1AAAAAAAAAAAAAAIAAAAxNgAAAAAAAAAAAAACAAAAMTcAAAAAAAAAAAAAAgAAADE4AAAAAAAAAAAAAAIAAAAxOQAAAAAAAAAAAAACAAAAMjAAAAAAAAAAAAAAAgAAADIxAAAAAAAAAAAAAAIAAAAyMgAAAAAAAAAAAAACAAAAMjMAAAAAAAAAAAAAAgAAADI0AAAAAAAAAAAAAAIAAAAyNQAAAAAAAAAAAAACAAAAMjYAAAAAAAAAAAAAAgAAADI3AAAAAAAAAAAAAAIAAAAyOAAAAAAAAAAAAAACAAAAMjkAAAAAAAAAAAAAAgAAADMwAAAAAAAAAAAAAAIAAAAzMQAAAAAAAAAAAAACAAAAMzIAAAAAAAAAAAAAAgAAADMzAAAAAAAAAAAAAAIAAAAzNAAAAAAAAAAAAAACAAAAMzUAAAAAAAAAAAAAAgAAADM2AAAAAAAAAAAAAAIAAAAzNwAAAAAAAAAAAAACAAAAMzgAAAAAAAAAAAAAAgAAADM5AAAAAAAAAAAAAAIAAAA0MAAAAAAAAAAAAAACAAAANDEAAAAAAAAAAAAAAgAAADQyAAAAAAAAAAAAAAIAAAA0MwAAAAAAAAAAAAACAAAANDQAAAAAAAAAAAAAAgAAADQ1AAAAAAAAAAAAAAIAAAA0NgAAAAAAAAAAAAACAAAANDcAAAAAAAAAAAAAAgAAADQ4AAAAAAAAAAAAAAIAAAA0OQAAAAAAAAAAAAACAAAANTAAAAAAAAAAAAAAAgAAADUxAAAAAAAAAAAAAAIAAAA1MgAAAAAAAAAAAAACAAAANTMAAAAAAAAAAAAAAgAAADU0AAAAAAAAAAAAAAIAAAA1NQAAAAAAAAAAAAACAAAANTYAAAAAAAAAAAAAAgAAADU3AAAAAAAAAAAAAAIAAAA1OAAAAAAAAAAAAAACAAAANTkAAAAAAAAAAAAAAgAAADYwAAAAAAAAAAAAAAIAAAA2MQAAAAAAAAAAAAACAAAANjIAAAAAAAAAAAAAAgAAADYzAAAAAAAAAAAAAAIAAAA2NAAAAAAAAAAAAAACAAAANjUAAAAAAAAAAAAAAgAAADY2AAAAAAAAAAAAAAIAAAA2NwAAAAAAAAAAAAACAAAANjgAAAAAAAAAAAAAAgAAADY5AAAAAAAAAAAAAAIAAAA3MAAAAAAAAAAAAAACAAAANzEAAAAAAAAAAAAAAgAAADcyAAAAAAAAAAAAAAIAAAA3MwAAAAAAAAAAAAACAAAANzQAAAAAAAAAAAAAAgAAADc1AAAAAAAAAAAAAAIAAAA3NgAAAAAAAAAAAAACAAAANzcAAAAAAAAAAAAAAgAAADc4AAAAAAAAAAAAAAIAAAA3OQAAAAAAAAAAAAACAAAAODAAAAAAAAAAAAAAAgAAADgxAAAAAAAAAAAAAAIAAAA4MgAAAAAAAAAAAAACAAAAODMAAAAAAAAAAAAAAgAAADg0AAAAAAAAAAAAAAIAAAA4NQAAAAAAAAAAAAACAAAAODYAAAAAAAAAAAAAAgAAADg3AAAAAAAAAAAAAAIAAAA4OAAAAAAAAAAAAAACAAAAODkAAAAAAAAAAAAAAgAAADkwAAAAAAAAAAAAAAIAAAA5MQAAAAAAAAAAAAACAAAAOTIAAAAAAAAAAAAAAgAAADkzAAAAAAAAAAAAAAIAAAA5NAAAAAAAAAAAAAACAAAAOTUAAAAAAAAAAAAAAgAAADk2AAAAAAAAAAAAAAIAAAA5NwAAAAAAAAAAAAACAAAAOTgAAAAAAAAAAAAAAgAAADk5AAAAAAAAAAAAAAMAAAAxMDAAAAAAAAAAAAABAAAAAAAAAAIAAAAAAAAAAwAAAAAAAAAEAAAAAAAAAAUAAAAAAAAABgAAAAAAAAAHAAAAAAAAAAgAAAAAAAAACQAAAAAAAAAKAAAAAAAAAAsAAAAAAAAADAAAAAAAAAANAAAAAAAAAA4AAAAAAAAADwAAAAAAAAAQAAAAAAAAABEAAAAAAAAAEgAAAAAAAAATAAAAAAAAABQAAAAAAAAAFQAAAAAAAAAWAAAAAAAAABcAAAAAAAAAGAAAAAAAAAAZAAAAAAAAABoAAAAAAAAAGwAAAAAAAAAcAAAAAAAAAB0AAAAAAAAAHgAAAAAAAAAfAAAAAAAAACAAAAAAAAAAIQAAAAAAAAAiAAAAAAAAACMAAAAAAAAAJAAAAAAAAAAlAAAAAAAAACYAAAAAAAAAJwAAAAAAAAAoAAAAAAAAACkAAAAAAAAAKgAAAAAAAAArAAAAAAAAACwAAAAAAAAALQAAAAAAAAAuAAAAAAAAAC8AAAAAAAAAMAAAAAAAAAAxAAAAAAAAADIAAAAAAAAAMwAAAAAAAAA0AAAAAAAAADUAAAAAAAAANgAAAAAAAAA3AAAAAAAAADgAAAAAAAAAOQAAAAAAAAA6AAAAAAAAADsAAAAAAAAAPAAAAAAAAAA9AAAAAAAAAD4AAAAAAAAAPwAAAAAAAABAAAAAAAAAAEEAAAAAAAAAQgAAAAAAAABDAAAAAAAAAEQAAAAAAAAARQAAAAAAAABGAAAAAAAAAEcAAAAAAAAASAAAAAAAAABJAAAAAAAAAEoAAAAAAAAASwAAAAAAAABMAAAAAAAAAE0AAAAAAAAATgAAAAAAAABPAAAAAAAAAFAAAAAAAAAAUQAAAAAAAABSAAAAAAAAAFMAAAAAAAAAVAAAAAAAAABVAAAAAAAAAFYAAAAAAAAAVwAAAAAAAABYAAAAAAAAAFkAAAAAAAAAWgAAAAAAAABbAAAAAAAAAFwAAAAAAAAAXQAAAAAAAABeAAAAAAAAAF8AAAAAAAAAYAAAAAAAAABhAAAAAAAAAGIAAAAAAAAAYwAAAAAAAABkAAAAAAAAAA=="; + for (int i = 1; i <= 100; ++i) { + strings[i] = std::to_string(i); + for (int j = 0; j < i; ++j) { + expected.insert(StringView(strings[i])); + } + } + } else { + VELOX_UNREACHABLE(); + } + ASSERT_EQ(encodeBase64(expected), expectedData); + auto data = decodeBase64(expectedData); + ApproxMostFrequentStreamSummary actual; + actual.setCapacity(100); + actual.mergeSerialized(data.data()); + ASSERT_EQ(actual.size(), expected.size()); + for (int i = 0; i < actual.size(); ++i) { + ASSERT_EQ(actual.values()[i], expected.values()[i]); + ASSERT_EQ(actual.counts()[i], expected.counts()[i]); + } +} + +TEST(ApproxMostFrequentStreamSummaryTest, serializationCompatibility) { + testSerializationCompatibility(); + testSerializationCompatibility(); +} + TEST(ApproxMostFrequentStreamSummaryTest, mergeSerialized) { constexpr int kSummaryCount = 10; constexpr int kCapacity = 30;