diff --git a/code/01-03/src/db.rs b/code/01-03/src/db.rs index d09c787..e4c1695 100644 --- a/code/01-03/src/db.rs +++ b/code/01-03/src/db.rs @@ -4,13 +4,12 @@ use std::sync::Arc; use crate::binder::{BindError, Binder}; use crate::catalog::{CatalogRef, DatabaseCatalog}; -use crate::executor::{ExecuteError, ExecutorBuilder}; +use crate::executor::{ExecuteError, Executor}; use crate::parser::{parse, ParserError}; /// The database instance. pub struct Database { catalog: CatalogRef, - executor_builder: ExecutorBuilder, } impl Default for Database { @@ -23,10 +22,7 @@ impl Database { /// Create a new database instance. pub fn new() -> Self { let catalog = Arc::new(DatabaseCatalog::new()); - Database { - catalog: catalog.clone(), - executor_builder: ExecutorBuilder::new(catalog), - } + Database { catalog } } /// Run SQL queries and return the outputs. @@ -34,13 +30,13 @@ impl Database { // parse let stmts = parse(sql)?; let mut binder = Binder::new(self.catalog.clone()); + let executor = Executor::new(self.catalog.clone()); let mut outputs = vec![]; for stmt in stmts { let bound_stmt = binder.bind(&stmt)?; debug!("{:#?}", bound_stmt); - let mut executor = self.executor_builder.build(bound_stmt); - let output = executor.execute()?; + let output = executor.execute(bound_stmt)?; outputs.push(output); } Ok(outputs) diff --git a/code/01-03/src/executor/create.rs b/code/01-03/src/executor/create.rs index 57a418b..abe4db0 100644 --- a/code/01-03/src/executor/create.rs +++ b/code/01-03/src/executor/create.rs @@ -1,18 +1,12 @@ use super::*; use crate::binder::BoundCreateTable; -/// The executor of `CREATE TABLE` statement. -pub struct CreateTableExecutor { - pub stmt: BoundCreateTable, - pub catalog: CatalogRef, -} - -impl Executor for CreateTableExecutor { - fn execute(&mut self) -> Result { - let schema = self.catalog.get_schema(self.stmt.schema_id).unwrap(); - let table_id = schema.add_table(&self.stmt.table_name).unwrap(); +impl Executor { + pub fn execute_create_table(&self, stmt: BoundCreateTable) -> Result { + let schema = self.catalog.get_schema(stmt.schema_id).unwrap(); + let table_id = schema.add_table(&stmt.table_name).unwrap(); let table = schema.get_table(table_id).unwrap(); - for (name, desc) in &self.stmt.columns { + for (name, desc) in &stmt.columns { table.add_column(name, desc.clone()).unwrap(); } Ok(String::new()) diff --git a/code/01-03/src/executor/mod.rs b/code/01-03/src/executor/mod.rs index da98aef..42fa22d 100644 --- a/code/01-03/src/executor/mod.rs +++ b/code/01-03/src/executor/mod.rs @@ -6,39 +6,26 @@ use crate::catalog::CatalogRef; mod create; mod select; -use self::create::*; -use self::select::*; - /// The error type of execution. #[derive(thiserror::Error, Debug)] pub enum ExecuteError {} -pub trait Executor { - fn execute(&mut self) -> Result; -} - -/// A type-erased executor object. -pub type BoxedExecutor = Box; - -/// The builder of executor. -pub struct ExecutorBuilder { +/// Execute the bound AST. +pub struct Executor { catalog: CatalogRef, } -impl ExecutorBuilder { - /// Create a new executor builder. - pub fn new(catalog: CatalogRef) -> ExecutorBuilder { - ExecutorBuilder { catalog } +impl Executor { + /// Create a new executor. + pub fn new(catalog: CatalogRef) -> Executor { + Executor { catalog } } - /// Build executor from a [BoundStatement]. - pub fn build(&self, stmt: BoundStatement) -> BoxedExecutor { + /// Execute a bound statement. + pub fn execute(&self, stmt: BoundStatement) -> Result { match stmt { - BoundStatement::CreateTable(stmt) => Box::new(CreateTableExecutor { - stmt, - catalog: self.catalog.clone(), - }), - BoundStatement::Select(stmt) => Box::new(SelectExecutor { stmt }), + BoundStatement::CreateTable(stmt) => self.execute_create_table(stmt), + BoundStatement::Select(stmt) => self.execute_select(stmt), } } } diff --git a/code/01-03/src/executor/select.rs b/code/01-03/src/executor/select.rs index e76575a..5787261 100644 --- a/code/01-03/src/executor/select.rs +++ b/code/01-03/src/executor/select.rs @@ -2,15 +2,10 @@ use super::*; use crate::binder::BoundSelect; use crate::parser::Value; -/// The executor of `SELECT` statement. -pub struct SelectExecutor { - pub stmt: BoundSelect, -} - -impl Executor for SelectExecutor { - fn execute(&mut self) -> Result { +impl Executor { + pub fn execute_select(&self, stmt: BoundSelect) -> Result { let mut output = String::new(); - for v in &self.stmt.values { + for v in &stmt.values { output += " "; match v { Value::SingleQuotedString(s) => output += s, diff --git a/code/01-04/src/db.rs b/code/01-04/src/db.rs index d1a5de8..deed63d 100644 --- a/code/01-04/src/db.rs +++ b/code/01-04/src/db.rs @@ -5,14 +5,14 @@ use std::sync::Arc; use crate::array::DataChunk; use crate::binder::{BindError, Binder}; use crate::catalog::{CatalogRef, DatabaseCatalog}; -use crate::executor::{ExecuteError, ExecutorBuilder}; +use crate::executor::{ExecuteError, Executor}; use crate::parser::{parse, ParserError}; -use crate::storage::InMemoryStorage; +use crate::storage::{InMemoryStorage, StorageRef}; /// The database instance. pub struct Database { catalog: CatalogRef, - executor_builder: ExecutorBuilder, + storage: StorageRef, } impl Default for Database { @@ -26,10 +26,7 @@ impl Database { pub fn new() -> Self { let catalog = Arc::new(DatabaseCatalog::new()); let storage = Arc::new(InMemoryStorage::new()); - Database { - catalog: catalog.clone(), - executor_builder: ExecutorBuilder::new(catalog, storage), - } + Database { catalog, storage } } /// Run SQL queries and return the outputs. @@ -37,13 +34,13 @@ impl Database { // parse let stmts = parse(sql)?; let mut binder = Binder::new(self.catalog.clone()); + let executor = Executor::new(self.catalog.clone(), self.storage.clone()); let mut outputs = vec![]; for stmt in stmts { let bound_stmt = binder.bind(&stmt)?; debug!("{:#?}", bound_stmt); - let mut executor = self.executor_builder.build(bound_stmt); - let output = executor.execute()?; + let output = executor.execute(bound_stmt)?; outputs.push(output); } Ok(outputs) diff --git a/code/01-04/src/executor/create.rs b/code/01-04/src/executor/create.rs index f2eec68..1521231 100644 --- a/code/01-04/src/executor/create.rs +++ b/code/01-04/src/executor/create.rs @@ -1,25 +1,17 @@ use super::*; use crate::binder::BoundCreateTable; use crate::catalog::TableRefId; -use crate::storage::StorageRef; -/// The executor of `CREATE TABLE` statement. -pub struct CreateTableExecutor { - pub stmt: BoundCreateTable, - pub catalog: CatalogRef, - pub storage: StorageRef, -} - -impl Executor for CreateTableExecutor { - fn execute(&mut self) -> Result { - let schema = self.catalog.get_schema(self.stmt.schema_id).unwrap(); - let table_id = schema.add_table(&self.stmt.table_name).unwrap(); +impl Executor { + pub fn execute_create_table(&self, stmt: BoundCreateTable) -> Result { + let schema = self.catalog.get_schema(stmt.schema_id).unwrap(); + let table_id = schema.add_table(&stmt.table_name).unwrap(); let table = schema.get_table(table_id).unwrap(); - for (name, desc) in &self.stmt.columns { + for (name, desc) in &stmt.columns { table.add_column(name, desc.clone()).unwrap(); } self.storage - .add_table(TableRefId::new(self.stmt.schema_id, table_id))?; + .add_table(TableRefId::new(stmt.schema_id, table_id))?; Ok(DataChunk::single(1)) } } diff --git a/code/01-04/src/executor/mod.rs b/code/01-04/src/executor/mod.rs index 532237c..701a22a 100644 --- a/code/01-04/src/executor/mod.rs +++ b/code/01-04/src/executor/mod.rs @@ -8,9 +8,6 @@ use crate::storage::{StorageError, StorageRef}; mod create; mod select; -use self::create::*; -use self::select::*; - /// The error type of execution. #[derive(thiserror::Error, Debug)] pub enum ExecuteError { @@ -18,34 +15,23 @@ pub enum ExecuteError { Storage(#[from] StorageError), } -pub trait Executor { - fn execute(&mut self) -> Result; -} - -/// A type-erased executor object. -pub type BoxedExecutor = Box; - -/// The builder of executor. -pub struct ExecutorBuilder { +/// Execute the bound AST. +pub struct Executor { catalog: CatalogRef, storage: StorageRef, } -impl ExecutorBuilder { - /// Create a new executor builder. - pub fn new(catalog: CatalogRef, storage: StorageRef) -> ExecutorBuilder { - ExecutorBuilder { catalog, storage } +impl Executor { + /// Create a new executor. + pub fn new(catalog: CatalogRef, storage: StorageRef) -> Executor { + Executor { catalog, storage } } - /// Build executor from a [BoundStatement]. - pub fn build(&self, stmt: BoundStatement) -> BoxedExecutor { + /// Execute a bound statement. + pub fn execute(&self, stmt: BoundStatement) -> Result { match stmt { - BoundStatement::CreateTable(stmt) => Box::new(CreateTableExecutor { - stmt, - catalog: self.catalog.clone(), - storage: self.storage.clone(), - }), - BoundStatement::Select(stmt) => Box::new(SelectExecutor { stmt }), + BoundStatement::CreateTable(stmt) => self.execute_create_table(stmt), + BoundStatement::Select(stmt) => self.execute_select(stmt), } } } diff --git a/code/01-04/src/executor/select.rs b/code/01-04/src/executor/select.rs index e756e36..dafaa47 100644 --- a/code/01-04/src/executor/select.rs +++ b/code/01-04/src/executor/select.rs @@ -2,14 +2,9 @@ use super::*; use crate::array::ArrayImpl; use crate::binder::BoundSelect; -/// The executor of `SELECT` statement. -pub struct SelectExecutor { - pub stmt: BoundSelect, -} - -impl Executor for SelectExecutor { - fn execute(&mut self) -> Result { - let chunk = self.stmt.values.iter().map(ArrayImpl::from).collect(); +impl Executor { + pub fn execute_select(&self, stmt: BoundSelect) -> Result { + let chunk = stmt.values.iter().map(ArrayImpl::from).collect(); Ok(chunk) } } diff --git a/code/01-05/src/db.rs b/code/01-05/src/db.rs index d1a5de8..deed63d 100644 --- a/code/01-05/src/db.rs +++ b/code/01-05/src/db.rs @@ -5,14 +5,14 @@ use std::sync::Arc; use crate::array::DataChunk; use crate::binder::{BindError, Binder}; use crate::catalog::{CatalogRef, DatabaseCatalog}; -use crate::executor::{ExecuteError, ExecutorBuilder}; +use crate::executor::{ExecuteError, Executor}; use crate::parser::{parse, ParserError}; -use crate::storage::InMemoryStorage; +use crate::storage::{InMemoryStorage, StorageRef}; /// The database instance. pub struct Database { catalog: CatalogRef, - executor_builder: ExecutorBuilder, + storage: StorageRef, } impl Default for Database { @@ -26,10 +26,7 @@ impl Database { pub fn new() -> Self { let catalog = Arc::new(DatabaseCatalog::new()); let storage = Arc::new(InMemoryStorage::new()); - Database { - catalog: catalog.clone(), - executor_builder: ExecutorBuilder::new(catalog, storage), - } + Database { catalog, storage } } /// Run SQL queries and return the outputs. @@ -37,13 +34,13 @@ impl Database { // parse let stmts = parse(sql)?; let mut binder = Binder::new(self.catalog.clone()); + let executor = Executor::new(self.catalog.clone(), self.storage.clone()); let mut outputs = vec![]; for stmt in stmts { let bound_stmt = binder.bind(&stmt)?; debug!("{:#?}", bound_stmt); - let mut executor = self.executor_builder.build(bound_stmt); - let output = executor.execute()?; + let output = executor.execute(bound_stmt)?; outputs.push(output); } Ok(outputs) diff --git a/code/01-05/src/executor/create.rs b/code/01-05/src/executor/create.rs index f2eec68..1521231 100644 --- a/code/01-05/src/executor/create.rs +++ b/code/01-05/src/executor/create.rs @@ -1,25 +1,17 @@ use super::*; use crate::binder::BoundCreateTable; use crate::catalog::TableRefId; -use crate::storage::StorageRef; -/// The executor of `CREATE TABLE` statement. -pub struct CreateTableExecutor { - pub stmt: BoundCreateTable, - pub catalog: CatalogRef, - pub storage: StorageRef, -} - -impl Executor for CreateTableExecutor { - fn execute(&mut self) -> Result { - let schema = self.catalog.get_schema(self.stmt.schema_id).unwrap(); - let table_id = schema.add_table(&self.stmt.table_name).unwrap(); +impl Executor { + pub fn execute_create_table(&self, stmt: BoundCreateTable) -> Result { + let schema = self.catalog.get_schema(stmt.schema_id).unwrap(); + let table_id = schema.add_table(&stmt.table_name).unwrap(); let table = schema.get_table(table_id).unwrap(); - for (name, desc) in &self.stmt.columns { + for (name, desc) in &stmt.columns { table.add_column(name, desc.clone()).unwrap(); } self.storage - .add_table(TableRefId::new(self.stmt.schema_id, table_id))?; + .add_table(TableRefId::new(stmt.schema_id, table_id))?; Ok(DataChunk::single(1)) } } diff --git a/code/01-05/src/executor/insert.rs b/code/01-05/src/executor/insert.rs index a4dc63d..66145c4 100644 --- a/code/01-05/src/executor/insert.rs +++ b/code/01-05/src/executor/insert.rs @@ -2,23 +2,13 @@ use itertools::Itertools; use super::*; use crate::array::{ArrayBuilderImpl, DataChunk}; -use crate::catalog::{ColumnId, TableRefId}; -use crate::storage::StorageRef; +use crate::binder::BoundInsert; use crate::types::{DataType, DataValue}; -/// The executor of `INSERT` statement. -pub struct InsertExecutor { - pub table_ref_id: TableRefId, - pub column_ids: Vec, - pub catalog: CatalogRef, - pub storage: StorageRef, - pub child: BoxedExecutor, -} - -impl Executor for InsertExecutor { - fn execute(&mut self) -> Result { - let table = self.storage.get_table(self.table_ref_id)?; - let catalog = self.catalog.get_table(self.table_ref_id).unwrap(); +impl Executor { + pub fn execute_insert(&self, stmt: BoundInsert) -> Result { + let table = self.storage.get_table(stmt.table_ref_id)?; + let catalog = self.catalog.get_table(stmt.table_ref_id).unwrap(); // Describe each column of the output chunks. // example: // columns = [0: Int, 1: Bool, 3: Float, 4: String] @@ -28,7 +18,7 @@ impl Executor for InsertExecutor { .all_columns() .values() .map( - |col| match self.column_ids.iter().position(|&id| id == col.id()) { + |col| match stmt.column_ids.iter().position(|&id| id == col.id()) { Some(index) => Column::Pick { index }, None => Column::Null { type_: col.datatype(), @@ -36,7 +26,7 @@ impl Executor for InsertExecutor { }, ) .collect_vec(); - let chunk = self.child.execute()?; + let chunk = self.execute_values(stmt)?; let count = chunk.cardinality(); table.append(transform_chunk(chunk, &output_columns))?; Ok(DataChunk::single(count as i32)) diff --git a/code/01-05/src/executor/mod.rs b/code/01-05/src/executor/mod.rs index 9364861..ab71fb1 100644 --- a/code/01-05/src/executor/mod.rs +++ b/code/01-05/src/executor/mod.rs @@ -10,11 +10,6 @@ mod insert; mod select; mod values; -use self::create::*; -use self::insert::*; -use self::select::*; -use self::values::*; - /// The error type of execution. #[derive(thiserror::Error, Debug)] pub enum ExecuteError { @@ -22,44 +17,24 @@ pub enum ExecuteError { Storage(#[from] StorageError), } -pub trait Executor { - fn execute(&mut self) -> Result; -} - -/// A type-erased executor object. -pub type BoxedExecutor = Box; - -/// The builder of executor. -pub struct ExecutorBuilder { +/// Execute the bound AST. +pub struct Executor { catalog: CatalogRef, storage: StorageRef, } -impl ExecutorBuilder { - /// Create a new executor builder. - pub fn new(catalog: CatalogRef, storage: StorageRef) -> ExecutorBuilder { - ExecutorBuilder { catalog, storage } +impl Executor { + /// Create a new executor. + pub fn new(catalog: CatalogRef, storage: StorageRef) -> Executor { + Executor { catalog, storage } } - /// Build executor from a [BoundStatement]. - pub fn build(&self, stmt: BoundStatement) -> BoxedExecutor { + /// Execute a bound statement. + pub fn execute(&self, stmt: BoundStatement) -> Result { match stmt { - BoundStatement::CreateTable(stmt) => Box::new(CreateTableExecutor { - stmt, - catalog: self.catalog.clone(), - storage: self.storage.clone(), - }), - BoundStatement::Insert(stmt) => Box::new(InsertExecutor { - table_ref_id: stmt.table_ref_id, - column_ids: stmt.column_ids, - catalog: self.catalog.clone(), - storage: self.storage.clone(), - child: Box::new(ValuesExecutor { - column_types: stmt.column_types, - values: stmt.values, - }), - }), - BoundStatement::Select(stmt) => Box::new(SelectExecutor { stmt }), + BoundStatement::CreateTable(stmt) => self.execute_create_table(stmt), + BoundStatement::Insert(stmt) => self.execute_insert(stmt), + BoundStatement::Select(stmt) => self.execute_select(stmt), } } } diff --git a/code/01-05/src/executor/select.rs b/code/01-05/src/executor/select.rs index 0c6b325..451f0dc 100644 --- a/code/01-05/src/executor/select.rs +++ b/code/01-05/src/executor/select.rs @@ -2,14 +2,9 @@ use super::*; use crate::array::ArrayImpl; use crate::binder::{BoundExpr, BoundSelect}; -/// The executor of `SELECT` statement. -pub struct SelectExecutor { - pub stmt: BoundSelect, -} - -impl Executor for SelectExecutor { - fn execute(&mut self) -> Result { - let chunk = (self.stmt.values.iter()) +impl Executor { + pub fn execute_select(&self, stmt: BoundSelect) -> Result { + let chunk = (stmt.values.iter()) .map(|BoundExpr::Constant(v)| ArrayImpl::from(v)) .collect(); Ok(chunk) diff --git a/code/01-05/src/executor/values.rs b/code/01-05/src/executor/values.rs index 16fab87..7d8b2bd 100644 --- a/code/01-05/src/executor/values.rs +++ b/code/01-05/src/executor/values.rs @@ -2,25 +2,18 @@ use itertools::Itertools; use super::*; use crate::array::{ArrayBuilderImpl, DataChunk}; -use crate::binder::BoundExpr; -use crate::types::{DataType, DataValue}; +use crate::binder::{BoundExpr, BoundInsert}; +use crate::types::DataValue; -/// The executor of `VALUES`. -pub struct ValuesExecutor { - pub column_types: Vec, - /// Each row is composed of multiple values, each value is represented by an expression. - pub values: Vec>, -} - -impl Executor for ValuesExecutor { - fn execute(&mut self) -> Result { - let cardinality = self.values.len(); - let mut builders = self +impl Executor { + pub fn execute_values(&self, stmt: BoundInsert) -> Result { + let cardinality = stmt.values.len(); + let mut builders = stmt .column_types .iter() .map(|ty| ArrayBuilderImpl::with_capacity(cardinality, ty)) .collect_vec(); - for row in &self.values { + for row in &stmt.values { for (expr, builder) in row.iter().zip(&mut builders) { let value = expr.eval_const()?; builder.push(&value); @@ -42,35 +35,3 @@ impl BoundExpr { } } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::array::ArrayImpl; - use crate::binder::BoundExpr; - use crate::types::{DataTypeExt, DataTypeKind, DataValue}; - - #[test] - fn values() { - let values = [[0, 100], [1, 101], [2, 102], [3, 103]]; - let mut executor = ValuesExecutor { - column_types: vec![DataTypeKind::Int(None).nullable(); 2], - values: values - .iter() - .map(|row| { - row.iter() - .map(|&v| BoundExpr::Constant(DataValue::Int32(v))) - .collect::>() - }) - .collect::>(), - }; - let output = executor.execute().unwrap(); - let expected = [ - ArrayImpl::Int32((0..4).collect()), - ArrayImpl::Int32((100..104).collect()), - ] - .into_iter() - .collect::(); - assert_eq!(output, expected); - } -} diff --git a/docs/src/01-03-create-table.md b/docs/src/01-03-create-table.md index 0aa8b9e..32d9260 100644 --- a/docs/src/01-03-create-table.md +++ b/docs/src/01-03-create-table.md @@ -1,8 +1,19 @@ # 创建表(`CREATE TABLE`) -在实现了 Catalog 之后,我们就可以创建第一个数据表。 +在实现了 Catalog 之后,我们就可以使用 `CREATE TABLE` 语句创建数据表: -在 SQL 语言中,创建表用 `CREATE TABLE` 语句实现。这里就需要引入 Binder 的概念。…… +```sql +CREATE TABLE student ( + id INTEGER PRIMARY KEY, + name VARCHAR NOT NULL, + age INTEGER +); +``` + +这一语句除了解析和执行两个步骤以外,还需要做名称的检查,并将它们与数据库内部对象绑定起来。 +这些工作一般是由一个叫做 Binder 的模块完成。 + +在此任务中,我们将实现一个基础的 Binder,同时拓展 Executor 实现相应的执行逻辑,最终支持 `CREATE TABLE` 语句。 @@ -10,16 +21,254 @@ ### Binder -TODO +Binder 是整个数据库系统中一个不太起眼但又十分重要的模块。 + +它的作用是将解析后生成的 AST 和 Schema 信息绑定起来,具体包括: + +- 检查输入的名称是否合法、是否有重复、有歧义 +- 推断表达式的返回值类型,并检查是否合法 +- 将输入的名称转换成内部 ID + +比如,对于一个简单的创建表的命令: + +```sql +CREATE TABLE student ( + id INTEGER PRIMARY KEY, + name VARCHAR NOT NULL, + age INTEGER +); +``` + +Binder 会依次完成以下工作: + +1. 由于 SQL 对大小写不敏感,Binder 会首先将所有名称统一成小写。 +2. 对于表名 `student`,自动补全省略的 schema 名。假如当前 schema 是 `school`,那么会将其补全成 `school.student`。 +3. 检查 schema 是否存在,并获取它的 Schema ID。 +4. 检查 schema 中是否已经存在名为 `student` 的表。 +5. 检查列名 `id` `name` `age` 是否合法、是否有重复。 +6. 检查列的属性是否合法,例如不能出现两个 `PRIMARY KEY`。 +7. 向 AST 中填入绑定后的信息:Schema ID。 + +对于插入数据的 `INSERT` 语句,例如: + +```sql +INSERT INTO student VALUES (1, 'Alice', 18) +``` + +Binder 需要查询表中每一列的信息,推断表达式的类型,并检查它们是否相符。 +换言之,Binder 应该能够识别出以下不合法的插入语句: + +```sql +INSERT INTO student VALUES (1) -- 存在未指定的 NOT NULL 值 +INSERT INTO student VALUES (1, 'Alice', 'old') -- 类型不匹配 +INSERT INTO student VALUES (1, 'Alice', 18+'g') -- 表达式类型不匹配 +``` + +对于更复杂的嵌套查询语句,Binder 还需要根据当前语境,推断出每个名称具体指代哪个对象: + +```sql +SELECT name FROM student WHERE sid IN ( +-- ^-----------^ student.sid + SELECT sid FROM enrolled WHERE class = 'database' +-- ^----------^ enrolled.sid +) +``` + +可以看出,Binder 干的都是一些比较繁琐的脏活累活。因此后面我们写的 Binder 代码也会比较冗长并且细节琐碎。 ## 任务目标 -能够创建数据表,支持以下 SQL: +能够创建数据表,支持以下 SQL 语句: ```sql -CREATE TABLE t (a INT) +CREATE TABLE student ( + id INTEGER PRIMARY KEY, + name VARCHAR NOT NULL, + age INTEGER +); +``` + +【练习】支持 `DROP TABLE` 语句,删除数据表: + +```sql +DROP TABLE student; +``` + +【练习】支持 `CREATE SCHEMA` 语句,创建 schema: + +```sql +CREATE SCHEMA school; ``` ## 整体设计 -TODO +在加入 Binder 之后,RisingLight 的整个数据处理流程扩展成了这个样子: + +![](img/01-03-mod.svg) + +其中 Binder 插在了 Parser 和 Executor 之间。 +它会将 Parser 生成的 AST 进行处理后,生成一个新的 AST 交给 Executor,在此过程中需要从 Catalog 读取 Schema 信息。 +Executor 拿到绑定后的 AST 去执行,在此过程中可能也会再次修改 Catalog(比如创建一个表)。 + +在代码结构上,我们可能会新增以下文件: + +``` +src +├── binder +│ ├── mod.rs +│ └── statement +│ ├── mod.rs +│ ├── create.rs +│ └── select.rs +├── executor +│ ├── mod.rs +│ ├── create.rs +│ └── select.rs +... +``` + +此外还需要对数据库顶层结构进行修改。 + +### Bound AST + +Binder 模块的主要任务是给 Parser 生成的 AST 绑定必要的信息。 + +由于我们的 Parser 使用了第三方库,不能在它的 AST 结构上扩展新的属性,所以只能定义新的结构来存放这些信息。 + +例如对于 `CREATE TABLE` 语句来说,绑定后的 AST 应该具有以下信息: + +```rust,no_run +// binder/statement/create.rs + +/// A bound `CREATE TABLE` statement. +#[derive(Debug, PartialEq, Clone)] +pub struct BoundCreateTable { + pub schema_id: SchemaId, // schema name 经过向 catalog 查询转换成了 ID + pub table_name: String, + pub columns: Vec<(String, ColumnDesc)>, +} +``` + +类似地,对于 1.1 中的 `SELECT 1` 语句而言,我们可以只提取出必要的值来保存: + +```rust,no_run +// binder/statement/select.rs + +use crate::parser::Value; + +/// A bound `SELECT` statement. +#[derive(Debug, PartialEq, Clone)] +pub struct BoundSelect { + pub values: Vec, +} +``` + +最后,我们需要定义一个 enum 将各种不同类型的语句聚合起来: + +```rust,no_run +// binder/mod.rs + +/// A bound SQL statement. +#[derive(Debug, PartialEq, Clone)] +pub enum BoundStatement { + CreateTable(BoundCreateTable), + Select(BoundSelect), +} +``` + +这样,一个 `BoundStatement` 变量就可以表示 Binder 生成的整个 AST 了。 + +### Binder + +接下来,我们实现真正的 `Binder` 对象。它会将 Parser 生成的 AST 转换成一个新的 AST。 +由于在绑定过程中会访问 Catalog 的数据,`Binder` 中需要存放一个 Catalog 对象的指针: + +```rust,no_run +pub struct Binder { + catalog: Arc, +} +``` + +我们在 `Binder` 对象上实现各种 `bind` 方法来完成对不同 AST 节点的处理: + +```rust,no_run +use crate::parser::{Query, Statement}; + +impl Binder { + pub fn bind(&mut self, stmt: &Statement) -> Result { + use Statement::*; + match stmt { + CreateTable { .. } => Ok(BoundStatement::CreateTable(self.bind_create_table(stmt)?)), + Query(query) => Ok(BoundStatement::Select(self.bind_select(query)?)), + _ => todo!("bind statement: {:#?}", stmt), + } + } + + fn bind_create_table(&mut self, stmt: &Statement) -> Result { + // YOUR CODE HERE + } + + fn bind_select(&mut self, query: &Query) -> Result { + // YOUR CODE HERE + } +} +``` + +注意到这些方法都使用了 `&mut self` 签名,这是因为 `Binder` 未来会有内部状态,并且在 bind 过程中还会修改这些状态。 + + +另外在 bind 过程中还可能产生各种各样的错误,比如名称不存在或者重复等等。 +我们将所有可能发生的错误定义在一个 `BindError` 错误类型中(参考 [1.1 错误处理](../01-01-hello-sql.md#错误处理)): + +```rust,no_run +/// The error type of bind operations. +#[derive(thiserror::Error, Debug, PartialEq)] +pub enum BindError { + #[error("schema not found: {0}")] + SchemaNotFound(String), + // ... +} +``` + +至于具体的 bind 逻辑,大家可以参考背景知识中描述的过程尝试自己实现。 + +### Executor + +在 1.1 中我们实现过一个最简单的执行器,它只是一个函数,拿到 AST 后做具体的执行。 +现在我们有了更多类型的语句,并且在执行它们的过程中还需要访问 Catalog。 +因此和 Binder 类似,我们现在需要将 Executor 也扩展为一个对象: + +```rust,no_run +pub struct Executor { + catalog: Arc, +} +``` + +然后在 `Executor` 上实现各种 `execute` 方法来对不同类型的 AST 节点做执行: + +```rust,no_run +/// The error type of execution. +#[derive(thiserror::Error, Debug)] +pub enum ExecuteError {...} + +impl Executor { + pub fn execute(&self, stmt: BoundStatement) -> Result { + match stmt { + BoundStatement::CreateTable(stmt) => self.execute_create_table(stmt), + BoundStatement::Select(stmt) => self.execute_select(stmt), + } + } + + fn execute_create_table(&self, stmt: BoundCreateTable) -> Result { + // YOUR CODE HERE + } + + fn execute_select(&self, query: BoundSelect) -> Result { + // YOUR CODE HERE + } +} +``` + +我们暂时将 Executor 的返回值设定为 `String` 类型,表示语句的执行结果。 +在下一个任务中,我们会实现更具体的内存数据类型 `Array` 和 `DataChunk`。 +到那时,Executor 的输出就是一段真正的数据了。 diff --git a/docs/src/01-intro.md b/docs/src/01-intro.md index f08d55f..180c7b1 100644 --- a/docs/src/01-intro.md +++ b/docs/src/01-intro.md @@ -3,14 +3,14 @@ 欢迎来到新手村! 在这里我们将白手起家,构建出一个能够运行简单 SQL 语句的数据库框架。 -在此过程中,我们会从一个 Parser 开始,逐步引入查询引擎所需的 Binder,Planner,Executor 等模块。 +在此过程中,我们会从一个 SQL 解析器开始,逐步引入查询引擎所需的各个模块。 最终实现数据库的 3 条基本命令:创建 `CREATE`,插入 `INSERT` 和查找 `SELECT`。 ## 世界地图 ![](img/world1.svg) -1. Hello,SQL:实现最简单的 SQL 解释器。 +1. Hello SQL:实现最简单的 SQL 解释器。 2. Catalog:定义 Catalog 相关数据结构。 @@ -20,14 +20,16 @@ 5. 插入数据:向表中插入数据,支持 `INSERT VALUES` 语句。 -6. 执行计划:实现 Plan Node,支持 `EXPLAIN` 语句。 +6. 执行计划:实现执行计划树,支持 `EXPLAIN` 语句。 7. 查询数据:从表中读取数据,支持 `SELECT v FROM t` 语句。 +8. 执行引擎:实现向量化模型的执行引擎。 + 除此之外,还有以下小练习: -8. 删除表:支持 `DROP TABLE` 语句。 +1. 删除表:支持 `DROP TABLE` 语句。 -9. 创建 Schema:支持 `CREATE SCHEMA` 语句。 +2. 创建 Schema:支持 `CREATE SCHEMA` 语句。 从新手村成功毕业以后,我们就有了探索其他世界所需的代码基础。 diff --git a/docs/src/img/01-03-mod.svg b/docs/src/img/01-03-mod.svg new file mode 100644 index 0000000..caeb7c2 --- /dev/null +++ b/docs/src/img/01-03-mod.svg @@ -0,0 +1,4 @@ + + + +
 lib
 lib
AST
AST
AST
AST
SQL
SQL
Output
Output
Parser
Parser
Binder
Binder
Executor
Executor
Catalog
Catalog
Text is not SVG - cannot display
\ No newline at end of file