Skip to content

Commit

Permalink
feat: Add sequence storage connector with index lookup join integrati…
Browse files Browse the repository at this point in the history
…on (#12227)

Summary:
Pull Request resolved: #12227

Add sequence storage connector and integrate with velox index lookup join. Currently we only support equality join
and call multi-get kv API which assume 1:1 mapping (note velox lookup join operator is able to handle 1:N mapping).
We support to read key column from lookup table by hacking to project the key column from lookup input.

Reviewed By: wenqiwooo, yuandagits

Differential Revision: D68935372

fbshipit-source-id: 7ced6744f462534f0fc2e2c4db94495a1bbae1cf
  • Loading branch information
xiaoxmeng authored and facebook-github-bot committed Jan 31, 2025
1 parent 3578ef6 commit eb91ba6
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 2 deletions.
1 change: 0 additions & 1 deletion velox/exec/IndexLookupJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,6 @@ RowVectorPtr IndexLookupJoin::produceRemainingOutputForLeftJoin() {
VELOX_CHECK_NULL(lookupResult_);
VELOX_CHECK(hasRemainingOutputForLeftJoin());
VELOX_CHECK_NULL(rawLookupInputHitIndices_);

prepareOutputRowMappings(outputBatchSize_);
VELOX_CHECK_NOT_NULL(rawLookupOutputNulls_);

Expand Down
66 changes: 65 additions & 1 deletion velox/exec/tests/IndexLookupJoinTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class IndexLookupJoinTest : public HiveConnectorTestBase,

void TearDown() override {
connector::unregisterConnectorFactory(kTestIndexConnectorName);
connector::unregisterConnector(kTestIndexConnectorName);
HiveConnectorTestBase::TearDown();
}

Expand Down Expand Up @@ -289,7 +290,7 @@ class IndexLookupJoinTest : public HiveConnectorTestBase,
core::PlanNodePtr makeLookupPlan(
const std::shared_ptr<core::PlanNodeIdGenerator>& planNodeIdGenerator,
core::TableScanNodePtr indexScanNode,
const std::vector<RowVectorPtr> probeVectors,
const std::vector<RowVectorPtr>& probeVectors,
const std::vector<std::string>& outputColumns,
core::JoinType joinType,
core::PlanNodeId& joinNodeId) {
Expand Down Expand Up @@ -503,6 +504,15 @@ TEST_P(IndexLookupJoinTest, basic) {
{"t1", "u1", "u2", "u3"},
core::JoinType::kInner,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t, u WHERE t.c0 = u.c0"},
{2048,
1,
10,
100,
10,
{"u0", "u1", "u2", "u3"},
{"t1", "u1", "u2", "u3"},
core::JoinType::kInner,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t, u WHERE t.c0 = u.c0"},
// 10% match + duplicate with larger lookup table.
{500,
4,
Expand All @@ -513,6 +523,15 @@ TEST_P(IndexLookupJoinTest, basic) {
{"t1", "u1", "u2", "u3"},
core::JoinType::kInner,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t, u WHERE t.c0 = u.c0"},
{2048,
4,
10,
100,
10,
{"u0", "u1", "u2", "u3"},
{"t1", "u1", "u2", "u3"},
core::JoinType::kInner,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t, u WHERE t.c0 = u.c0"},
// Empty lookup table.
{0,
1,
Expand Down Expand Up @@ -573,6 +592,15 @@ TEST_P(IndexLookupJoinTest, basic) {
{"t1", "u1", "u2", "u3"},
core::JoinType::kInner,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t, u WHERE t.c0 = u.c0"},
{2048,
1,
10,
100,
2,
{"u0", "u1", "u2", "u3"},
{"t1", "u1", "u2", "u3"},
core::JoinType::kInner,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t, u WHERE t.c0 = u.c0"},
// All matches + duplicate with larger lookup table.
{500,
4,
Expand All @@ -583,6 +611,15 @@ TEST_P(IndexLookupJoinTest, basic) {
{"t1", "u1", "u2", "u3"},
core::JoinType::kInner,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t, u WHERE t.c0 = u.c0"},
{2048,
4,
10,
100,
2,
{"u0", "u1", "u2", "u3"},
{"t1", "u1", "u2", "u3"},
core::JoinType::kInner,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t, u WHERE t.c0 = u.c0"},
// No probe projection.
{500,
1,
Expand Down Expand Up @@ -726,6 +763,15 @@ TEST_P(IndexLookupJoinTest, basic) {
{"t1", "u1", "u2", "u3"},
core::JoinType::kLeft,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t LEFT JOIN u ON t.c0 = u.c0"},
{2048,
1,
10,
100,
2,
{"u0", "u1", "u2", "u3"},
{"t1", "u1", "u2", "u3"},
core::JoinType::kLeft,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t LEFT JOIN u ON t.c0 = u.c0"},
// very few (2%) match + duplicate with larger lookup table.
{500,
4,
Expand All @@ -746,6 +792,15 @@ TEST_P(IndexLookupJoinTest, basic) {
{"t1", "u1", "u2", "u3"},
core::JoinType::kLeft,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t LEFT JOIN u ON t.c0 = u.c0"},
{2048,
1,
10,
100,
2,
{"u0", "u1", "u2", "u3"},
{"t1", "u1", "u2", "u3"},
core::JoinType::kLeft,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t LEFT JOIN u ON t.c0 = u.c0"},
// All matches + duplicate with larger lookup table.
{500,
4,
Expand All @@ -756,6 +811,15 @@ TEST_P(IndexLookupJoinTest, basic) {
{"t1", "u1", "u2", "u3"},
core::JoinType::kLeft,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t LEFT JOIN u ON t.c0 = u.c0"},
{2048,
4,
10,
100,
2,
{"u0", "u1", "u2", "u3"},
{"t1", "u1", "u2", "u3"},
core::JoinType::kLeft,
"SELECT t.c1, u.c1, u.c2, u.c3 FROM t LEFT JOIN u ON t.c0 = u.c0"},
// Probe column reorder in output.
{500,
4,
Expand Down

0 comments on commit eb91ba6

Please sign in to comment.