Skip to content

Commit

Permalink
feat(p3): introduce external merge sort & remove several executors
Browse files Browse the repository at this point in the history
  • Loading branch information
xx01cyx committed Oct 18, 2024
1 parent d5f7943 commit 2a7d140
Show file tree
Hide file tree
Showing 15 changed files with 268 additions and 76 deletions.
1 change: 1 addition & 0 deletions src/execution/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ add_library(
aggregation_executor.cpp
delete_executor.cpp
execution_common.cpp
external_merge_sort_executor.cpp
executor_factory.cpp
filter_executor.cpp
fmt_impl.cpp
Expand Down
3 changes: 2 additions & 1 deletion src/execution/executor_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "execution/executors/abstract_executor.h"
#include "execution/executors/aggregation_executor.h"
#include "execution/executors/delete_executor.h"
#include "execution/executors/external_merge_sort_executor.h"
#include "execution/executors/filter_executor.h"
#include "execution/executors/hash_join_executor.h"
#include "execution/executors/index_scan_executor.h"
Expand Down Expand Up @@ -166,7 +167,7 @@ auto ExecutorFactory::CreateExecutor(ExecutorContext *exec_ctx, const AbstractPl
case PlanType::Sort: {
const auto *sort_plan = dynamic_cast<const SortPlanNode *>(plan.get());
auto child = ExecutorFactory::CreateExecutor(exec_ctx, sort_plan->GetChildPlan());
return std::make_unique<SortExecutor>(exec_ctx, sort_plan, std::move(child));
return std::make_unique<ExternalMergeSortExecutor<2>>(exec_ctx, sort_plan, std::move(child));
}

// Create a new topN executor
Expand Down
45 changes: 44 additions & 1 deletion src/execution/mock_scan_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ static const char *ta_list_2023_fall[] = {"skyzh", "yliang412", "ferna
static const char *ta_list_2024[] = {"AlSchlo", "walkingcabbages", "averyqi115", "lanlou1554", "sweetsuro",
"ChaosZhai", "SDTheSlayer", "xx01cyx", "yliang412", "thelongmarch-azx"};

static const char *ta_list_2024_fall[] = {"17zhangw", "connortsui20", "J-HowHuang", "lanlou1554",
"prashanthduvvada", "unw9527", "xx01cyx", "yashkothari42"};

static const char *ta_oh_2022[] = {"Tuesday", "Wednesday", "Monday", "Wednesday", "Thursday", "Friday",
"Wednesday", "Randomly", "Tuesday", "Monday", "Tuesday"};

Expand All @@ -48,11 +51,15 @@ static const char *ta_oh_2023_fall[] = {"Randomly", "Tuesday", "Wednesday", "T
static const char *ta_oh_2024[] = {"Friday", "Thursday", "Friday", "Wednesday", "Thursday",
"Yesterday", "Monday", "Tuesday", "Tuesday", "Monday"};

static const char *ta_oh_2024_fall[] = {"Wednesday", "Thursday", "Tuesday", "Monday",
"Friday", "Thursday", "Tuesday", "Friday"};

static const char *course_on_date[] = {"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"};

const char *mock_table_list[] = {"__mock_table_1", "__mock_table_2", "__mock_table_3", "__mock_table_tas_2022",
"__mock_table_tas_2023", "__mock_table_tas_2023_fall", "__mock_table_tas_2024",
"__mock_agg_input_small", "__mock_agg_input_big", "__mock_table_schedule_2022",
"__mock_table_tas_2024_fall", "__mock_agg_input_small", "__mock_agg_input_big",
"__mock_external_merge_sort_input", "__mock_table_schedule_2022",
"__mock_table_schedule", "__mock_table_123", "__mock_graph",
// For leaderboard Q1
"__mock_t1",
Expand Down Expand Up @@ -94,6 +101,10 @@ auto GetMockTableSchemaOf(const std::string &table) -> Schema {
return Schema{std::vector{Column{"github_id", TypeId::VARCHAR, 128}, Column{"office_hour", TypeId::VARCHAR, 128}}};
}

if (table == "__mock_table_tas_2024_fall") {
return Schema{std::vector{Column{"github_id", TypeId::VARCHAR, 128}, Column{"office_hour", TypeId::VARCHAR, 128}}};
}

if (table == "__mock_table_schedule_2022") {
return Schema{std::vector{Column{"day_of_week", TypeId::VARCHAR, 128}, Column{"has_lecture", TypeId::INTEGER}}};
}
Expand All @@ -108,6 +119,11 @@ auto GetMockTableSchemaOf(const std::string &table) -> Schema {
Column{"v5", TypeId::INTEGER}, Column{"v6", TypeId::VARCHAR, 128}}};
}

if (table == "__mock_external_merge_sort_input") {
return Schema{
std::vector{Column{"v1", TypeId::INTEGER}, Column{"v2", TypeId::INTEGER}, Column{"v3", TypeId::INTEGER}}};
}

if (table == "__mock_graph") {
return Schema{std::vector{Column{"src", TypeId::INTEGER}, Column{"dst", TypeId::INTEGER},
Column{"src_label", TypeId::VARCHAR, 8}, Column{"dst_label", TypeId::VARCHAR, 8},
Expand Down Expand Up @@ -182,6 +198,10 @@ auto GetSizeOf(const MockScanPlanNode *plan) -> size_t {
return sizeof(ta_list_2024) / sizeof(ta_list_2024[0]);
}

if (table == "__mock_table_tas_2024_fall") {
return sizeof(ta_list_2024_fall) / sizeof(ta_list_2024_fall[0]);
}

if (table == "__mock_table_schedule_2022") {
return sizeof(course_on_date) / sizeof(course_on_date[0]);
}
Expand All @@ -198,6 +218,10 @@ auto GetSizeOf(const MockScanPlanNode *plan) -> size_t {
return 10000;
}

if (table == "__mock_external_merge_sort_input") {
return 100000;
}

if (table == "__mock_graph") {
return GRAPH_NODE_CNT * GRAPH_NODE_CNT;
}
Expand Down Expand Up @@ -329,6 +353,15 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
};
}

if (table == "__mock_table_tas_2024_fall") {
return [plan](size_t cursor) {
std::vector<Value> values{};
values.push_back(ValueFactory::GetVarcharValue(ta_list_2024_fall[cursor]));
values.push_back(ValueFactory::GetVarcharValue(ta_oh_2024_fall[cursor]));
return Tuple{values, &plan->OutputSchema()};
};
}

if (table == "__mock_table_schedule_2022") {
return [plan](size_t cursor) {
std::vector<Value> values{};
Expand Down Expand Up @@ -374,6 +407,16 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
return Tuple{values, &plan->OutputSchema()};
};
}
// select * from __mock_external_merge_sort_input order by v2 desc, v3 asc limit 100;
if (table == "__mock_external_merge_sort_input") {
return [plan](size_t cursor) {
std::vector<Value> values{};
values.push_back(ValueFactory::GetIntegerValue(cursor));
values.push_back(ValueFactory::GetIntegerValue((cursor + 1777) % 15000));
values.push_back(ValueFactory::GetIntegerValue((cursor + 3) % 111));
return Tuple{values, &plan->OutputSchema()};
};
}

if (table == "__mock_table_123") {
return [plan](size_t cursor) {
Expand Down
2 changes: 1 addition & 1 deletion src/include/common/bustub_instance.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ class BusTubInstance {
auto MakeExecutorContext(Transaction *txn, bool is_modify) -> std::unique_ptr<ExecutorContext>;

public:
explicit BusTubInstance(const std::filesystem::path &db_file_name, size_t bpm_size = 128);
explicit BusTubInstance(const std::filesystem::path &db_file_name, size_t bpm_size = BUFFER_POOL_SIZE);

explicit BusTubInstance(size_t bpm_size = 128);

Expand Down
2 changes: 1 addition & 1 deletion src/include/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ static constexpr int INVALID_TXN_ID = -1; // invalid transaction id
static constexpr int INVALID_LSN = -1; // invalid log sequence number

static constexpr int BUSTUB_PAGE_SIZE = 4096; // size of a data page in byte
static constexpr int BUFFER_POOL_SIZE = 10; // size of buffer pool
static constexpr int BUFFER_POOL_SIZE = 128; // size of buffer pool
static constexpr int DEFAULT_DB_IO_SIZE = 16; // starting size of file on disk
static constexpr int LOG_BUFFER_SIZE = ((BUFFER_POOL_SIZE + 1) * BUSTUB_PAGE_SIZE); // size of a log buffer in byte
static constexpr int BUCKET_SIZE = 50; // size of extendible hash bucket
Expand Down
3 changes: 3 additions & 0 deletions src/include/storage/table/tuple.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ class Tuple {
// constructor for creating a new tuple based on input value
Tuple(std::vector<Value> values, const Schema *schema);

// constructor for creating a new tuple by copying fron existing bytes
Tuple(RID rid, const char *data, uint32_t size);

Tuple(const Tuple &other) = default;

// move constructor
Expand Down
1 change: 0 additions & 1 deletion src/optimizer/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ auto Optimizer::Optimize(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef
p = OptimizeMergeProjection(p);
p = OptimizeMergeFilterNLJ(p);
p = OptimizeOrderByAsIndexScan(p);
p = OptimizeSortLimitAsTopN(p);
p = OptimizeMergeFilterScan(p);
p = OptimizeSeqScanAsIndexScan(p);
return p;
Expand Down
7 changes: 7 additions & 0 deletions src/storage/table/tuple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include <cassert>
#include <cstdlib>
#include <cstring>
#include <sstream>
#include <string>
#include <vector>
Expand Down Expand Up @@ -60,6 +61,12 @@ Tuple::Tuple(std::vector<Value> values, const Schema *schema) {
}
}

Tuple::Tuple(RID rid, const char *data, uint32_t size) {
rid_ = rid;
data_.resize(size);
memcpy(data_.data(), data, size);
}

auto Tuple::GetValue(const Schema *schema, const uint32_t column_idx) const -> Value {
assert(schema);
const TypeId column_type = schema->GetColumn(column_idx).GetType();
Expand Down
18 changes: 8 additions & 10 deletions test/sql/p3.00-primer.slt
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
query rowsort
select github_id, office_hour from __mock_table_tas_2024;
select github_id, office_hour from __mock_table_tas_2024_fall;
----
AlSchlo Friday
walkingcabbages Thursday
averyqi115 Friday
lanlou1554 Wednesday
sweetsuro Thursday
ChaosZhai Yesterday
SDTheSlayer Monday
17zhangw Wednesday
connortsui20 Thursday
J-HowHuang Tuesday
lanlou1554 Monday
prashanthduvvada Friday
unw9527 Thursday
xx01cyx Tuesday
yliang412 Tuesday
thelongmarch-azx Monday
yashkothari42 Friday
4 changes: 2 additions & 2 deletions test/sql/p3.07-simple-agg.slt
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

# How many TAs are there in 2024 Spring?
query
select count(*) from __mock_table_tas_2024;
select count(*) from __mock_table_tas_2024_fall;
----
10
8

# The real test process begins...

Expand Down
5 changes: 2 additions & 3 deletions test/sql/p3.08-group-agg-1.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@
# "rowsort" means that the order of result doesn't matter.

query rowsort
select office_hour, count(*) from __mock_table_tas_2024 group by office_hour;
select office_hour, count(*) from __mock_table_tas_2024_fall group by office_hour;
----
Tuesday 2
Friday 2
Monday 2
Yesterday 1
Monday 1
Wednesday 1
Thursday 2

Expand Down
7 changes: 3 additions & 4 deletions test/sql/p3.10-simple-join.slt
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@ set force_optimizer_starter_rule=yes

query rowsort
select * from
__mock_table_tas_2024 inner join __mock_table_schedule
__mock_table_tas_2024_fall inner join __mock_table_schedule
on office_hour = day_of_week
where has_lecture = 1;
----
lanlou1554 Wednesday Wednesday 1
SDTheSlayer Monday Monday 1
thelongmarch-azx Monday Monday 1
lanlou1554 Monday Monday 1
17zhangw Wednesday Wednesday 1


# The real test begins...
Expand Down
7 changes: 3 additions & 4 deletions test/sql/p3.14-hash-join.slt
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@

query rowsort +ensure:hash_join
select * from
__mock_table_tas_2024 inner join __mock_table_schedule
__mock_table_tas_2024_fall inner join __mock_table_schedule
on office_hour = day_of_week
where has_lecture = 1;
----
SDTheSlayer Monday Monday 1
thelongmarch-azx Monday Monday 1
lanlou1554 Wednesday Wednesday 1
lanlou1554 Monday Monday 1
17zhangw Wednesday Wednesday 1


# The real test begins...
Expand Down
Loading

0 comments on commit 2a7d140

Please sign in to comment.