Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner, CTE, view: Fix default inline CTE which contains orderby/limit/distinct and inside of view | tidb-test=pr/2415 #56609

Merged
merged 8 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 27 additions & 6 deletions pkg/planner/core/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ func (b *PlanBuilder) buildAggregation(ctx context.Context, p base.LogicalPlan,
}
// flag it if cte contain aggregation
if b.buildingCTE {
b.outerCTEs[len(b.outerCTEs)-1].containAggOrWindow = true
b.outerCTEs[len(b.outerCTEs)-1].containRecursiveForbiddenOperator = true
}
var rollupExpand *logicalop.LogicalExpand
if expand, ok := p.(*logicalop.LogicalExpand); ok {
Expand Down Expand Up @@ -1496,6 +1496,10 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p base.LogicalPlan, f
func (b *PlanBuilder) buildDistinct(child base.LogicalPlan, length int) (*logicalop.LogicalAggregation, error) {
b.optFlag = b.optFlag | rule.FlagBuildKeyInfo
b.optFlag = b.optFlag | rule.FlagPushDownAgg
// flag it if cte contain distinct
if b.buildingCTE {
b.outerCTEs[len(b.outerCTEs)-1].containRecursiveForbiddenOperator = true
}
plan4Agg := logicalop.LogicalAggregation{
AggFuncs: make([]*aggregation.AggFuncDesc, 0, child.Schema().Len()),
GroupByItems: expression.Column2Exprs(child.Schema().Clone().Columns[:length]),
Expand Down Expand Up @@ -2091,6 +2095,10 @@ func extractLimitCountOffset(ctx expression.BuildContext, limit *ast.Limit) (cou

func (b *PlanBuilder) buildLimit(src base.LogicalPlan, limit *ast.Limit) (base.LogicalPlan, error) {
b.optFlag = b.optFlag | rule.FlagPushDownTopN
// flag it if cte contain limit
if b.buildingCTE {
b.outerCTEs[len(b.outerCTEs)-1].containRecursiveForbiddenOperator = true
}
var (
offset, count uint64
err error
Expand Down Expand Up @@ -3921,6 +3929,10 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p b
}

if sel.OrderBy != nil {
// flag it if cte contain order by
if b.buildingCTE {
b.outerCTEs[len(b.outerCTEs)-1].containRecursiveForbiddenOperator = true
}
// We need to keep the ORDER BY clause for the following cases:
// 1. The select is top level query, order should be honored
// 2. The query has LIMIT clause
Expand Down Expand Up @@ -4227,9 +4239,9 @@ func (b *PlanBuilder) tryBuildCTE(ctx context.Context, tn *ast.TableName, asName
prevSchema := cte.seedLP.Schema().Clone()
lp.SetSchema(getResultCTESchema(cte.seedLP.Schema(), b.ctx.GetSessionVars()))

// If current CTE query contain another CTE which 'containAggOrWindow' is true, current CTE 'containAggOrWindow' will be true
// If current CTE query contain another CTE which 'containRecursiveForbiddenOperator' is true, current CTE 'containRecursiveForbiddenOperator' will be true
if b.buildingCTE {
b.outerCTEs[len(b.outerCTEs)-1].containAggOrWindow = cte.containAggOrWindow || b.outerCTEs[len(b.outerCTEs)-1].containAggOrWindow
b.outerCTEs[len(b.outerCTEs)-1].containRecursiveForbiddenOperator = cte.containRecursiveForbiddenOperator || b.outerCTEs[len(b.outerCTEs)-1].containRecursiveForbiddenOperator
}
// Compute cte inline
b.computeCTEInlineFlag(cte)
Expand Down Expand Up @@ -4287,13 +4299,22 @@ func (b *PlanBuilder) computeCTEInlineFlag(cte *cteInfo) {
b.ctx.GetSessionVars().StmtCtx.SetHintWarning(
fmt.Sprintf("Recursive CTE %s can not be inlined by merge() or tidb_opt_force_inline_cte.", cte.def.Name))
}
} else if cte.containAggOrWindow && b.buildingRecursivePartForCTE {
cte.isInline = false
} else if cte.containRecursiveForbiddenOperator && b.buildingRecursivePartForCTE {
if cte.forceInlineByHintOrVar {
b.ctx.GetSessionVars().StmtCtx.AppendWarning(plannererrors.ErrCTERecursiveForbidsAggregation.FastGenByArgs(cte.def.Name))
}
} else if cte.consumerCount > 1 {
cte.isInline = false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cte.isInline is the only the variable declared here and used here, we can use a temporary variable rather than a member variable inside cte?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

which means this function will return a bool back to the caller

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is used by buildSequence fuction so I cannot just return a bool to the caller

func (b *PlanBuilder) tryToBuildSequence(ctes []*cteInfo, p base.LogicalPlan) base.LogicalPlan {
	if !b.ctx.GetSessionVars().EnableMPPSharedCTEExecution {
		return p
	}
	for i := len(ctes) - 1; i >= 0; i-- {
		if !ctes[i].nonRecursive {
			return p
		}
		if ctes[i].isInline || ctes[i].cteClass == nil {
			ctes = append(ctes[:i], ctes[i+1:]...)
		}
	}

} else if cte.consumerCount != 1 {
// If hint or session variable is set, it can be inlined by user.
if cte.forceInlineByHintOrVar {
cte.isInline = true
} else {
// Consumer count > 1 or = 0, CTE can not be inlined by default.
// Case the consumer count = 0 (issue #56582)
// It means that CTE maybe inside of view and the UpdateCTEConsumerCount(preprocess phase) is skipped
// So all of CTE.consumerCount is not updated, and we can not use it to determine whether CTE can be inlined.
cte.isInline = false
}
} else {
cte.isInline = true
Expand Down Expand Up @@ -6455,7 +6476,7 @@ func sortWindowSpecs(groupedFuncs map[*ast.WindowSpec][]*ast.WindowFuncExpr, ord

func (b *PlanBuilder) buildWindowFunctions(ctx context.Context, p base.LogicalPlan, groupedFuncs map[*ast.WindowSpec][]*ast.WindowFuncExpr, orderedSpec []*ast.WindowSpec, aggMap map[*ast.AggregateFuncExpr]int) (base.LogicalPlan, map[*ast.WindowFuncExpr]int, error) {
if b.buildingCTE {
b.outerCTEs[len(b.outerCTEs)-1].containAggOrWindow = true
b.outerCTEs[len(b.outerCTEs)-1].containRecursiveForbiddenOperator = true
}
args := make([]ast.ExprNode, 0, 4)
windowMap := make(map[*ast.WindowFuncExpr]int)
Expand Down
4 changes: 2 additions & 2 deletions pkg/planner/core/planbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,8 @@ type cteInfo struct {
isInline bool
// forceInlineByHintOrVar will be true when CTE is hint by merge() or session variable "tidb_opt_force_inline_cte=true"
forceInlineByHintOrVar bool
// If CTE contain aggregation or window function in query (Indirect references to other cte containing agg or window in the query are also counted.)
containAggOrWindow bool
// If CTE contain aggregation, window function, order by, distinct and limit in query (Indirect references to other cte containing those operator in the query are also counted.)
containRecursiveForbiddenOperator bool
// Compute in preprocess phase. Record how many consumers the current CTE has
consumerCount int
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,92 @@ CTE_4 1.80 root Recursive CTE
└─Projection(Recursive Part) 0.80 root cast(plus(Column#38, 1), bigint(1) BINARY)->Column#40
└─Selection 0.80 root lt(plus(Column#38, 1), 10)
└─CTETable 1.00 root Scan on CTE_4
create table test(a int);
explain WITH RECURSIVE CTE (x) AS (SELECT 1 UNION ALL SELECT distinct a FROM test), CTE1 AS (SELECT x FROM CTE UNION ALL select CTE.x from CTE join CTE1 on CTE.x=CTE1.x) SELECT * FROM CTE1; -- CTE contain distinct and ref by CET1 recursive part cannot be inlined;
id estRows task access object operator info
CTEFullScan_52 14401.80 root CTE:cte1 data:CTE_1
CTE_1 14401.80 root Recursive CTE
├─CTEFullScan_40(Seed Part) 8001.00 root CTE:cte data:CTE_0
└─HashJoin_45(Recursive Part) 6400.80 root inner join, equal:[eq(Column#11, Column#12)]
├─Selection_49(Build) 6400.80 root not(isnull(Column#12))
│ └─CTETable_50 8001.00 root Scan on CTE_1
└─Selection_47(Probe) 6400.80 root not(isnull(Column#11))
└─CTEFullScan_48 8001.00 root CTE:cte data:CTE_0
CTE_0 8001.00 root Non-Recursive CTE
└─Union_27(Seed Part) 8001.00 root
├─Projection_28 1.00 root 1->Column#7
│ └─TableDual_29 1.00 root rows:1
└─Projection_30 8000.00 root cast(planner__core__casetest__physicalplantest__physical_plan.test.a, bigint(11) BINARY)->Column#7
└─HashAgg_35 8000.00 root group by:planner__core__casetest__physicalplantest__physical_plan.test.a, funcs:firstrow(planner__core__casetest__physicalplantest__physical_plan.test.a)->planner__core__casetest__physicalplantest__physical_plan.test.a
└─TableReader_36 8000.00 root data:HashAgg_31
└─HashAgg_31 8000.00 cop[tikv] group by:planner__core__casetest__physicalplantest__physical_plan.test.a,
└─TableFullScan_34 10000.00 cop[tikv] table:test keep order:false, stats:pseudo
create view test_cte(a) as WITH RECURSIVE CTE (x) AS (SELECT 1 UNION ALL SELECT distinct a FROM test) , CTE1 AS (SELECT x FROM CTE UNION ALL select CTE.x from CTE join CTE1 on CTE.x=CTE1.x) SELECT * FROM CTE1;
explain select * from test_cte; -- CTE (inside of view) cannot be inlined by default;
id estRows task access object operator info
CTEFullScan_54 14401.80 root CTE:cte1 data:CTE_1
CTE_1 14401.80 root Recursive CTE
├─CTEFullScan_42(Seed Part) 8001.00 root CTE:cte data:CTE_0
└─HashJoin_47(Recursive Part) 6400.80 root inner join, equal:[eq(Column#11, Column#12)]
├─Selection_51(Build) 6400.80 root not(isnull(Column#12))
│ └─CTETable_52 8001.00 root Scan on CTE_1
└─Selection_49(Probe) 6400.80 root not(isnull(Column#11))
└─CTEFullScan_50 8001.00 root CTE:cte data:CTE_0
CTE_0 8001.00 root Non-Recursive CTE
└─Union_29(Seed Part) 8001.00 root
├─Projection_30 1.00 root 1->Column#7
│ └─TableDual_31 1.00 root rows:1
└─Projection_32 8000.00 root cast(planner__core__casetest__physicalplantest__physical_plan.test.a, bigint(11) BINARY)->Column#7
└─HashAgg_37 8000.00 root group by:planner__core__casetest__physicalplantest__physical_plan.test.a, funcs:firstrow(planner__core__casetest__physicalplantest__physical_plan.test.a)->planner__core__casetest__physicalplantest__physical_plan.test.a
└─TableReader_38 8000.00 root data:HashAgg_33
└─HashAgg_33 8000.00 cop[tikv] group by:planner__core__casetest__physicalplantest__physical_plan.test.a,
└─TableFullScan_36 10000.00 cop[tikv] table:test keep order:false, stats:pseudo
create view test_inline_cte(a) as with CTE (x) as (select distinct a from test) select * from CTE;
explain select * from test_inline_cte; -- CTE (inside of view) cannot be inlined by default;
id estRows task access object operator info
CTEFullScan_17 8000.00 root CTE:cte data:CTE_0
CTE_0 8000.00 root Non-Recursive CTE
└─HashAgg_12(Seed Part) 8000.00 root group by:planner__core__casetest__physicalplantest__physical_plan.test.a, funcs:firstrow(planner__core__casetest__physicalplantest__physical_plan.test.a)->planner__core__casetest__physicalplantest__physical_plan.test.a
└─TableReader_13 8000.00 root data:HashAgg_8
└─HashAgg_8 8000.00 cop[tikv] group by:planner__core__casetest__physicalplantest__physical_plan.test.a,
└─TableFullScan_11 10000.00 cop[tikv] table:test keep order:false, stats:pseudo
create view test_force_inline_cte(a) as with CTE (x) as (select /*+ merge() */ distinct a from test) select * from CTE;
explain select * from test_force_inline_cte; -- CTE (inside of view) can be inlined by force;
id estRows task access object operator info
HashAgg_16 8000.00 root group by:planner__core__casetest__physicalplantest__physical_plan.test.a, funcs:firstrow(planner__core__casetest__physicalplantest__physical_plan.test.a)->planner__core__casetest__physicalplantest__physical_plan.test.a
└─TableReader_17 8000.00 root data:HashAgg_12
└─HashAgg_12 8000.00 cop[tikv] group by:planner__core__casetest__physicalplantest__physical_plan.test.a,
└─TableFullScan_15 10000.00 cop[tikv] table:test keep order:false, stats:pseudo
explain WITH RECURSIVE CTE (x) AS (SELECT a FROM test limit 1) , CTE1(x) AS (SELECT a FROM test UNION ALL select CTE.x from CTE join CTE1 on CTE.x=CTE1.x) SELECT * FROM CTE1; -- CTE contain limit and ref by CET1 recursive part cannot be inlined;
id estRows task access object operator info
CTEFullScan_42 16400.00 root CTE:cte1 data:CTE_1
CTE_1 16400.00 root Recursive CTE
├─TableReader_22(Seed Part) 10000.00 root data:TableFullScan_21
│ └─TableFullScan_21 10000.00 cop[tikv] table:test keep order:false, stats:pseudo
└─HashJoin_36(Recursive Part) 6400.00 root inner join, equal:[eq(planner__core__casetest__physicalplantest__physical_plan.test.a, planner__core__casetest__physicalplantest__physical_plan.test.a)]
├─Selection_37(Build) 0.80 root not(isnull(planner__core__casetest__physicalplantest__physical_plan.test.a))
│ └─CTEFullScan_38 1.00 root CTE:cte data:CTE_0
└─Selection_39(Probe) 8000.00 root not(isnull(planner__core__casetest__physicalplantest__physical_plan.test.a))
└─CTETable_40 10000.00 root Scan on CTE_1
CTE_0 1.00 root Non-Recursive CTE
└─Limit_28(Seed Part) 1.00 root offset:0, count:1
└─TableReader_32 1.00 root data:Limit_31
└─Limit_31 1.00 cop[tikv] offset:0, count:1
└─TableFullScan_30 1.00 cop[tikv] table:test keep order:false, stats:pseudo
explain WITH RECURSIVE CTE (x) AS (SELECT a FROM test order by a) , CTE1(x) AS (SELECT a FROM test UNION ALL select CTE.x from CTE join CTE1 on CTE.x=CTE1.x) SELECT * FROM CTE1; -- CTE contain order by and ref by CET1 recursive part cannot be inlined;
id estRows task access object operator info
CTEFullScan_35 20000.00 root CTE:cte1 data:CTE_1
CTE_1 20000.00 root Recursive CTE
├─TableReader_20(Seed Part) 10000.00 root data:TableFullScan_19
│ └─TableFullScan_19 10000.00 cop[tikv] table:test keep order:false, stats:pseudo
└─HashJoin_29(Recursive Part) 10000.00 root inner join, equal:[eq(planner__core__casetest__physicalplantest__physical_plan.test.a, planner__core__casetest__physicalplantest__physical_plan.test.a)]
├─Selection_30(Build) 8000.00 root not(isnull(planner__core__casetest__physicalplantest__physical_plan.test.a))
│ └─CTEFullScan_31 10000.00 root CTE:cte data:CTE_0
└─Selection_32(Probe) 8000.00 root not(isnull(planner__core__casetest__physicalplantest__physical_plan.test.a))
└─CTETable_33 10000.00 root Scan on CTE_1
CTE_0 10000.00 root Non-Recursive CTE
└─TableReader_25(Seed Part) 10000.00 root data:TableFullScan_24
└─TableFullScan_24 10000.00 cop[tikv] table:test keep order:false, stats:pseudo
drop table if exists t;
create table t(a int, b int, c int, index(c));
insert into t values (1, 1, 1), (1, 1, 3), (1, 2, 3), (2, 1, 3), (1, 2, NULL);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,17 @@ explain format='brief' with cte as (select 1) select /*+ MERGE() */ * from cte u
explain format='brief' with a as (select 8 as id from dual),maxa as (select max(id) as max_id from a),b as (with recursive temp as (select 1 as lvl from dual union all select lvl+1 from temp, maxa where lvl < max_id)select * from temp) select * from b; -- issue #47711, maxa cannot be inlined because it contains agg and in the recursive part of cte temp;
explain format='brief' with a as (select count(*) from t1), b as (select 2 as bb from a), c as (with recursive tmp as (select 1 as res from t1 union all select res+1 from tmp,b where res+1 < bb) select * from tmp) select * from c; -- inline a, cannot be inline b because b indirectly contains agg and in the recursive part of cte tmp;
explain format='brief' with a as (select count(*) from t1), b as (select 2 as bb from a), c as (with recursive tmp as (select bb as res from b union all select res+1 from tmp where res +1 < 10) select * from tmp) select * from c; -- inline a, b, cannot be inline tmp, c;
create table test(a int);
explain WITH RECURSIVE CTE (x) AS (SELECT 1 UNION ALL SELECT distinct a FROM test), CTE1 AS (SELECT x FROM CTE UNION ALL select CTE.x from CTE join CTE1 on CTE.x=CTE1.x) SELECT * FROM CTE1; -- CTE contain distinct and ref by CET1 recursive part cannot be inlined;
create view test_cte(a) as WITH RECURSIVE CTE (x) AS (SELECT 1 UNION ALL SELECT distinct a FROM test) , CTE1 AS (SELECT x FROM CTE UNION ALL select CTE.x from CTE join CTE1 on CTE.x=CTE1.x) SELECT * FROM CTE1;
explain select * from test_cte; -- CTE (inside of view) cannot be inlined by default;
create view test_inline_cte(a) as with CTE (x) as (select distinct a from test) select * from CTE;
explain select * from test_inline_cte; -- CTE (inside of view) cannot be inlined by default;
create view test_force_inline_cte(a) as with CTE (x) as (select /*+ merge() */ distinct a from test) select * from CTE;
explain select * from test_force_inline_cte; -- CTE (inside of view) can be inlined by force;
explain WITH RECURSIVE CTE (x) AS (SELECT a FROM test limit 1) , CTE1(x) AS (SELECT a FROM test UNION ALL select CTE.x from CTE join CTE1 on CTE.x=CTE1.x) SELECT * FROM CTE1; -- CTE contain limit and ref by CET1 recursive part cannot be inlined;
explain WITH RECURSIVE CTE (x) AS (SELECT a FROM test order by a) , CTE1(x) AS (SELECT a FROM test UNION ALL select CTE.x from CTE join CTE1 on CTE.x=CTE1.x) SELECT * FROM CTE1; -- CTE contain order by and ref by CET1 recursive part cannot be inlined;


# TestPushdownDistinctEnableAggPushDownDisable
drop table if exists t;
Expand Down Expand Up @@ -1005,4 +1016,4 @@ insert into tbl_43 values("BCmuENPHzSOIMJLPB"),("LDOdXZYpOR"),("R"),("TloTqcHhdg
explain format = 'brief' select min(col_304) from (select /*+ use_index_merge( tbl_43 ) */ * from tbl_43 where not( tbl_43.col_304 between 'YEpfYfPVvhMlHGHSMKm' and 'PE' ) or tbl_43.col_304 in ( 'LUBGzGMA' ) and tbl_43.col_304 between 'HpsjfuSReCwBoh' and 'fta' or not( tbl_43.col_304 between 'MFWmuOsoyDv' and 'TSeMYpDXnFIyp' ) order by col_304) x;
select min(col_304) from (select /*+ use_index_merge( tbl_43 ) */ * from tbl_43 where not( tbl_43.col_304 between 'YEpfYfPVvhMlHGHSMKm' and 'PE' ) or tbl_43.col_304 in ( 'LUBGzGMA' ) and tbl_43.col_304 between 'HpsjfuSReCwBoh' and 'fta' or not( tbl_43.col_304 between 'MFWmuOsoyDv' and 'TSeMYpDXnFIyp' ) order by col_304) x;
explain format = 'brief' select max(col_304) from (select /*+ use_index_merge( tbl_43 ) */ * from tbl_43 where not( tbl_43.col_304 between 'YEpfYfPVvhMlHGHSMKm' and 'PE' ) or tbl_43.col_304 in ( 'LUBGzGMA' ) and tbl_43.col_304 between 'HpsjfuSReCwBoh' and 'fta' or not( tbl_43.col_304 between 'MFWmuOsoyDv' and 'TSeMYpDXnFIyp' ) order by col_304) x;
select max(col_304) from (select /*+ use_index_merge( tbl_43 ) */ * from tbl_43 where not( tbl_43.col_304 between 'YEpfYfPVvhMlHGHSMKm' and 'PE' ) or tbl_43.col_304 in ( 'LUBGzGMA' ) and tbl_43.col_304 between 'HpsjfuSReCwBoh' and 'fta' or not( tbl_43.col_304 between 'MFWmuOsoyDv' and 'TSeMYpDXnFIyp' ) order by col_304) x;
select max(col_304) from (select /*+ use_index_merge( tbl_43 ) */ * from tbl_43 where not( tbl_43.col_304 between 'YEpfYfPVvhMlHGHSMKm' and 'PE' ) or tbl_43.col_304 in ( 'LUBGzGMA' ) and tbl_43.col_304 between 'HpsjfuSReCwBoh' and 'fta' or not( tbl_43.col_304 between 'MFWmuOsoyDv' and 'TSeMYpDXnFIyp' ) order by col_304) x;