Skip to content

Commit

Permalink
*: Disable non-ga hash join types in hash join v2 (pingcap#59505)
Browse files Browse the repository at this point in the history
  • Loading branch information
windtalker authored Feb 14, 2025
1 parent 0c61a62 commit 72a1145
Show file tree
Hide file tree
Showing 15 changed files with 564 additions and 70 deletions.
2 changes: 1 addition & 1 deletion pkg/executor/join/joinversion/join_version.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ var (

func init() {
// This variable is set to true for test, need to be set back to false in release version
UseHashJoinV2ForNonGAJoin = true
UseHashJoinV2ForNonGAJoin = false
}

// IsOptimizedVersion returns true if hashJoinVersion equals to HashJoinVersionOptimized
Expand Down
1 change: 1 addition & 0 deletions pkg/planner/cascades/cascades_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ func TestXFormedOperatorShouldDeriveTheirStatsOwn(t *testing.T) {
res2 = tk.MustQuery("explain format=\"brief\" SELECT /*+ inl_hash_join(tab, t2@sel_2) */ 1 FROM t1 AS tab WHERE (EXISTS(SELECT 1 FROM t2 WHERE a2 = a1 ))").String()
require.Equal(t, res1, res2)

tk.MustExec("set tidb_hash_join_version=optimized")
tk.Session().GetSessionVars().SetEnableCascadesPlanner(false)
res1 = tk.MustQuery("explain format=\"brief\" SELECT /*+ hash_join(tab, t2@sel_2) */ 1 FROM t1 AS tab WHERE (EXISTS(SELECT 1 FROM t2 WHERE a2 = a1 ))").String()
tk.Session().GetSessionVars().SetEnableCascadesPlanner(true)
Expand Down
1 change: 1 addition & 0 deletions pkg/planner/core/casetest/enforcempp/enforce_mpp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ func TestEnforceMPPWarning4(t *testing.T) {
// test table
tk.MustExec("use test")
tk.MustExec("set tidb_cost_model_version=2")
tk.MustExec("set tidb_hash_join_version=optimized")
tk.MustExec("drop table if exists t")
tk.MustExec("CREATE TABLE t(a int primary key)")
tk.MustExec("drop table if exists s")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ func TestMPPRightSemiJoin(t *testing.T) {
}
tk.MustExec("analyze table t1 all columns")
tk.MustExec("analyze table t2 all columns")
tk.MustExec("set @@tidb_allow_mpp=1; set @@tidb_enforce_mpp=1;")
tk.MustExec("set @@tidb_allow_mpp=1; set @@tidb_enforce_mpp=1; set @@tidb_hash_join_version=optimized;")
{
var input []string
var output []struct {
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ func isGAForHashJoinV2(joinType logicalop.JoinType, leftJoinKeys []*expression.C
}
}
switch joinType {
case logicalop.LeftOuterJoin, logicalop.RightOuterJoin, logicalop.InnerJoin:
case logicalop.LeftOuterJoin, logicalop.RightOuterJoin, logicalop.InnerJoin, logicalop.AntiSemiJoin, logicalop.SemiJoin:
return true
default:
return false
Expand Down
137 changes: 136 additions & 1 deletion tests/integrationtest/r/cte.result
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,16 @@ c1 c2
2 1
// Test semi apply.
insert into t1 values(2, 3);
explain select * from t1 where exists(with cte1 as (select c1 from t2 where t2.c2 = t1.c2) select c1 from cte1);
explain select /*+ set_var(tidb_hash_join_version=legacy) */ * from t1 where exists(with cte1 as (select c1 from t2 where t2.c2 = t1.c2) select c1 from cte1);
id estRows task access object operator info
HashJoin_14 7992.00 root semi join, left side:TableReader_17, equal:[eq(cte.t1.c2, cte.t2.c2)]
├─TableReader_20(Build) 9990.00 root data:Selection_19
│ └─Selection_19 9990.00 cop[tikv] not(isnull(cte.t2.c2))
│ └─TableFullScan_18 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo
└─TableReader_17(Probe) 9990.00 root data:Selection_16
└─Selection_16 9990.00 cop[tikv] not(isnull(cte.t1.c2))
└─TableFullScan_15 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo
explain select /*+ set_var(tidb_hash_join_version=optimized) */ * from t1 where exists(with cte1 as (select c1 from t2 where t2.c2 = t1.c2) select c1 from cte1);
id estRows task access object operator info
HashJoin_14 7992.00 root semi join, left side:TableReader_18, equal:[eq(cte.t1.c2, cte.t2.c2)]
├─TableReader_21(Build) 9990.00 root data:Selection_20
Expand Down Expand Up @@ -1109,7 +1118,133 @@ LEFT JOIN table_c a
ON t.col_4 = a.col_4
AND a.col_1 = dt.date3
)
SELECT /*+ set_var(tidb_hash_join_version=legacy) */ col_4
FROM (
SELECT col_4
FROM product_detail
UNION ALL
SELECT col_4
FROM product_detail
) a;
id estRows task access object operator info
Union_210 199600.20 root
├─Projection_211 99800.10 root cte1.table_c.col_4->Column#418
│ └─CTEFullScan_212 99800.10 root CTE:product_detail data:CTE_4
└─Projection_213 99800.10 root cte1.table_c.col_4->Column#418
└─CTEFullScan_214 99800.10 root CTE:product_detail data:CTE_4
CTE_4 99800.10 root Non-Recursive CTE
└─Projection_172(Seed Part) 99800.10 root cte1.table_c.col_4, 3集合->Column#413
└─HashJoin_188 99800.10 root left outer join, left side:HashJoin_190, equal:[eq(cte1.table_c.col_4, cte1.table_c.col_4) eq(Column#390, cte1.table_c.col_1)]
├─TableReader_204(Build) 9980.01 root data:Selection_203
│ └─Selection_203 9980.01 cop[tikv] not(isnull(cte1.table_c.col_1)), not(isnull(cte1.table_c.col_4))
│ └─TableFullScan_202 10000.00 cop[tikv] table:a keep order:false, stats:pseudo
└─HashJoin_190(Probe) 80000.00 root CARTESIAN left outer join, left side:HashAgg_196
├─CTEFullScan_200(Build) 10.00 root CTE:date_table AS dt data:CTE_0
└─HashAgg_196(Probe) 8000.00 root group by:cte1.table_c.col_3, cte1.table_c.col_4, funcs:firstrow(cte1.table_c.col_4)->cte1.table_c.col_4
└─TableReader_197 8000.00 root data:HashAgg_192
└─HashAgg_192 8000.00 cop[tikv] group by:cte1.table_c.col_3, cte1.table_c.col_4,
└─TableFullScan_195 10000.00 cop[tikv] table:t keep order:false, stats:pseudo
CTE_0 10.00 root Non-Recursive CTE
└─Apply_95(Seed Part) 10.00 root CARTESIAN left outer join, left side:Apply_97
├─Apply_97(Build) 10.00 root CARTESIAN left outer join, left side:Apply_99
│ ├─Apply_99(Build) 10.00 root CARTESIAN left outer join, left side:Apply_101
│ │ ├─Apply_101(Build) 10.00 root CARTESIAN left outer join, left side:TableReader_104
│ │ │ ├─TableReader_104(Build) 10.00 root data:Selection_103
│ │ │ │ └─Selection_103 10.00 cop[tikv] eq(cte1.table_d.col_1, 20240628)
│ │ │ │ └─TableFullScan_102 10000.00 cop[tikv] table:d keep order:false, stats:pseudo
│ │ │ └─StreamAgg_106(Probe) 10.00 root funcs:max(cte1.table_c.col_1)->Column#159
│ │ │ └─TopN_109 10.00 root cte1.table_c.col_1:desc, offset:0, count:1
│ │ │ └─HashJoin_114 63936.00 root semi join, left side:Projection_115, equal:[eq(Column#423, Column#424)]
│ │ │ ├─Projection_119(Build) 80000.00 root cast(cte1.table_d.col_1, double BINARY)->Column#424
│ │ │ │ └─TableReader_122 80000.00 root data:Selection_121
│ │ │ │ └─Selection_121 80000.00 cop[tikv] eq(cast(cte1.table_d.col_2, double BINARY), 1)
│ │ │ │ └─TableFullScan_120 100000.00 cop[tikv] table:d keep order:false, stats:pseudo
│ │ │ └─Projection_115(Probe) 79920.00 root cte1.table_c.col_1, cast(cte1.table_c.col_1, double BINARY)->Column#423
│ │ │ └─IndexReader_118 79920.00 root index:Selection_117
│ │ │ └─Selection_117 79920.00 cop[tikv] le(cte1.table_c.col_1, concat(cast(year(cast(date_sub(cte1.table_d.col_1, 1, "YEAR"), datetime(6) BINARY)), var_string(20)), "1231"))
│ │ │ └─IndexFullScan_116 99900.00 cop[tikv] table:a, index:index_col_1_3(col_1, col_3) keep order:false, stats:pseudo
│ │ └─StreamAgg_124(Probe) 10.00 root funcs:max(cte1.table_a.col_1)->Column#208
│ │ └─TopN_127 10.00 root cte1.table_a.col_1:desc, offset:0, count:1
│ │ └─HashJoin_132 63936.00 root semi join, left side:Projection_133, equal:[eq(Column#429, Column#430)]
│ │ ├─Projection_137(Build) 80000.00 root cast(cte1.table_d.col_1, double BINARY)->Column#430
│ │ │ └─TableReader_140 80000.00 root data:Selection_139
│ │ │ └─Selection_139 80000.00 cop[tikv] eq(cast(cte1.table_d.col_2, double BINARY), 1)
│ │ │ └─TableFullScan_138 100000.00 cop[tikv] table:d keep order:false, stats:pseudo
│ │ └─Projection_133(Probe) 79920.00 root cte1.table_a.col_1, cast(cte1.table_a.col_1, double BINARY)->Column#429
│ │ └─IndexReader_136 79920.00 root index:Selection_135
│ │ └─Selection_135 79920.00 cop[tikv] le(cte1.table_a.col_1, concat(cast(year(cast(date_sub(cte1.table_d.col_1, 1, "YEAR"), datetime(6) BINARY)), var_string(20)), "1231"))
│ │ └─IndexFullScan_134 99900.00 cop[tikv] table:a, index:index_col_1(col_1) keep order:false, stats:pseudo
│ └─StreamAgg_142(Probe) 10.00 root funcs:max(cte1.table_c.col_1)->Column#230
│ └─Limit_146 10.00 root offset:0, count:1
│ └─IndexReader_155 10.00 root index:Limit_154
│ └─Limit_154 10.00 cop[tikv] offset:0, count:1
│ └─Selection_153 10.00 cop[tikv] le(cast(cte1.table_c.col_1, double BINARY), cast(cte1.table_d.col_1, double BINARY))
│ └─IndexFullScan_152 12.50 cop[tikv] table:table_c, index:index_col_1_3(col_1, col_3) keep order:true, desc, stats:pseudo
└─StreamAgg_157(Probe) 10.00 root funcs:max(cte1.table_a.col_1)->Column#254
└─Limit_161 10.00 root offset:0, count:1
└─IndexReader_170 10.00 root index:Limit_169
└─Limit_169 10.00 cop[tikv] offset:0, count:1
└─Selection_168 10.00 cop[tikv] le(cast(cte1.table_a.col_1, double BINARY), cast(cte1.table_d.col_1, double BINARY))
└─IndexFullScan_167 12.50 cop[tikv] table:table_a, index:index_col_1(col_1) keep order:true, desc, stats:pseudo
desc WITH date_table AS (
SELECT
d.col_1 AS date,
(SELECT MAX(col_1)
FROM table_c a
WHERE col_1 <=
CONCAT(YEAR(DATE_SUB(d.col_1, INTERVAL 1 YEAR)),
'1231')
AND EXISTS (SELECT 1
FROM table_d d
WHERE a.col_1 = d.col_1
AND d.col_2 = 1)) AS date1,
(SELECT MAX(col_1)
FROM table_a a
WHERE col_1 <= CONCAT(YEAR(DATE_SUB(d.col_1, INTERVAL 1 YEAR)),
'1231')
AND EXISTS (SELECT 1
FROM table_d d
WHERE a.col_1 = d.col_1
AND d.col_2 = 1)) AS date2,
(SELECT MAX(col_1)
FROM table_c
WHERE col_1 <= d.col_1) AS date3,
(SELECT MAX(col_1)
FROM table_a
WHERE col_1 <= d.col_1) AS date4
FROM table_d d
WHERE d.col_1 = '20240628'
),
rm_am_champs_ex_risk_portfolio_seed_money_1 AS (
SELECT b.col_2
FROM table_a b
LEFT JOIN table_e rb
ON rb.col_1 = b.col_19
AND b.col_6 = rb.col_3
WHERE b.col_2 = (SELECT date4 FROM date_table)
),
rm_am_champs_ex_risk_portfolio_seed_money_2 AS (
SELECT b.col_2
FROM table_a b
LEFT JOIN table_e rb
ON rb.col_1 = b.col_19
AND b.col_6 = rb.col_3
),
product_base AS (
SELECT DISTINCT t.col_3, col_4, 'ML' AS is_do
FROM table_c t
),
product_detail AS (
SELECT t.col_4,
"3集合" AS nature_investment
FROM product_base t
LEFT JOIN date_table dt
ON 1 = 1
LEFT JOIN table_c a
ON t.col_4 = a.col_4
AND a.col_1 = dt.date3
)
SELECT /*+ set_var(tidb_hash_join_version=optimized) */ col_4
FROM (
SELECT col_4
FROM product_detail
Expand Down
9 changes: 8 additions & 1 deletion tests/integrationtest/r/naaj.result
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,14 @@ HashJoin_9 10000.00 root anti left outer semi join, left side:TableReader_11, e
│ └─TableFullScan_12 10000.00 cop[tikv] table:naaj_B keep order:false, stats:pseudo
└─TableReader_11(Probe) 10000.00 root data:TableFullScan_10
└─TableFullScan_10 10000.00 cop[tikv] table:naaj_A keep order:false, stats:pseudo
explain select * from naaj_A where (a+1,b*2) not in (select a, b=1 from naaj_B where naaj_A.a = naaj_B.a);
explain select /*+ set_var(tidb_hash_join_version=legacy) */ * from naaj_A where (a+1,b*2) not in (select a, b=1 from naaj_B where naaj_A.a = naaj_B.a);
id estRows task access object operator info
HashJoin_9 8000.00 root anti semi join, left side:TableReader_11, equal:[eq(naaj.naaj_a.a, naaj.naaj_b.a)], other cond:eq(mul(naaj.naaj_a.b, 2), eq(naaj.naaj_b.b, 1)), eq(plus(naaj.naaj_a.a, 1), naaj.naaj_b.a)
├─TableReader_13(Build) 10000.00 root data:TableFullScan_12
│ └─TableFullScan_12 10000.00 cop[tikv] table:naaj_B keep order:false, stats:pseudo
└─TableReader_11(Probe) 10000.00 root data:TableFullScan_10
└─TableFullScan_10 10000.00 cop[tikv] table:naaj_A keep order:false, stats:pseudo
explain select /*+ set_var(tidb_hash_join_version=optimized) */ * from naaj_A where (a+1,b*2) not in (select a, b=1 from naaj_B where naaj_A.a = naaj_B.a);
id estRows task access object operator info
HashJoin_9 8000.00 root anti semi join, left side:TableReader_12, equal:[eq(naaj.naaj_a.a, naaj.naaj_b.a)], other cond:eq(mul(naaj.naaj_a.b, 2), eq(naaj.naaj_b.b, 1)), eq(plus(naaj.naaj_a.a, 1), naaj.naaj_b.a)
├─TableReader_14(Build) 10000.00 root data:TableFullScan_13
Expand Down
9 changes: 8 additions & 1 deletion tests/integrationtest/r/planner/cascades/integration.result
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,14 @@ Projection_22 1.00 root Column#7, planner__cascades__integration.t1.a, planner_
select sum(a), (select t1.a from t1 where t1.a = t2.a limit 1), (select t1.b from t1 where t1.b = t2.b limit 1) from t2;
sum(a) (select t1.a from t1 where t1.a = t2.a limit 1) (select t1.b from t1 where t1.b = t2.b limit 1)
6 1 11
explain select a from t1 where exists(select 1 from t2 where t1.a = t2.a);
explain select /*+ set_var(tidb_hash_join_version=legacy) */ a from t1 where exists(select 1 from t2 where t1.a = t2.a);
id estRows task access object operator info
MergeJoin_17 8000.00 root semi join, left side:TableReader_29, left key:planner__cascades__integration.t1.a, right key:planner__cascades__integration.t2.a
├─TableReader_31(Build) 10000.00 root data:TableFullScan_30
│ └─TableFullScan_30 10000.00 cop[tikv] table:t2 keep order:true, stats:pseudo
└─TableReader_29(Probe) 10000.00 root data:TableFullScan_28
└─TableFullScan_28 10000.00 cop[tikv] table:t1 keep order:true, stats:pseudo
explain select /*+ set_var(tidb_hash_join_version=optimized) */ a from t1 where exists(select 1 from t2 where t1.a = t2.a);
id estRows task access object operator info
MergeJoin_17 8000.00 root semi join, left side:TableReader_30, left key:planner__cascades__integration.t1.a, right key:planner__cascades__integration.t2.a
├─TableReader_32(Build) 10000.00 root data:TableFullScan_31
Expand Down
28 changes: 27 additions & 1 deletion tests/integrationtest/r/planner/core/casetest/integration.result
Original file line number Diff line number Diff line change
Expand Up @@ -1418,7 +1418,33 @@ create table t1 (a int, b int);
create table t2_part (a int, b int, key(a)) partition by hash(a) partitions 4;
set @@tidb_skip_missing_partition_stats = off;
set @@tidb_opt_fix_control = "";
explain select /*+ TIDB_INLJ(t2_part@sel_2) */ * from t1 where t1.b<10 and not exists (select 1 from t2_part where t1.a=t2_part.a and t2_part.b<20);
explain select /*+ TIDB_INLJ(t2_part@sel_2), set_var(tidb_hash_join_version=legacy) */ * from t1 where t1.b<10 and not exists (select 1 from t2_part where t1.a=t2_part.a and t2_part.b<20);
id estRows task access object operator info
HashJoin_19 2658.67 root anti semi join, left side:TableReader_22, equal:[eq(planner__core__casetest__integration.t1.a, planner__core__casetest__integration.t2_part.a)]
├─PartitionUnion_23(Build) 13293.33 root
│ ├─TableReader_31 3323.33 root data:Projection_25
│ │ └─Projection_25 3323.33 cop[tikv] planner__core__casetest__integration.t2_part.a
│ │ └─Selection_30 3323.33 cop[tikv] lt(planner__core__casetest__integration.t2_part.b, 20)
│ │ └─TableFullScan_29 10000.00 cop[tikv] table:t2_part, partition:p0 keep order:false, stats:pseudo
│ ├─TableReader_39 3323.33 root data:Projection_33
│ │ └─Projection_33 3323.33 cop[tikv] planner__core__casetest__integration.t2_part.a
│ │ └─Selection_38 3323.33 cop[tikv] lt(planner__core__casetest__integration.t2_part.b, 20)
│ │ └─TableFullScan_37 10000.00 cop[tikv] table:t2_part, partition:p1 keep order:false, stats:pseudo
│ ├─TableReader_47 3323.33 root data:Projection_41
│ │ └─Projection_41 3323.33 cop[tikv] planner__core__casetest__integration.t2_part.a
│ │ └─Selection_46 3323.33 cop[tikv] lt(planner__core__casetest__integration.t2_part.b, 20)
│ │ └─TableFullScan_45 10000.00 cop[tikv] table:t2_part, partition:p2 keep order:false, stats:pseudo
│ └─TableReader_55 3323.33 root data:Projection_49
│ └─Projection_49 3323.33 cop[tikv] planner__core__casetest__integration.t2_part.a
│ └─Selection_54 3323.33 cop[tikv] lt(planner__core__casetest__integration.t2_part.b, 20)
│ └─TableFullScan_53 10000.00 cop[tikv] table:t2_part, partition:p3 keep order:false, stats:pseudo
└─TableReader_22(Probe) 3323.33 root data:Selection_21
└─Selection_21 3323.33 cop[tikv] lt(planner__core__casetest__integration.t1.b, 10)
└─TableFullScan_20 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo
Level Code Message
Warning 1105 disable dynamic pruning due to t2_part has no global stats
Warning 1815 Optimizer Hint /*+ INL_JOIN(t2_part) */ or /*+ TIDB_INLJ(t2_part) */ is inapplicable
explain select /*+ TIDB_INLJ(t2_part@sel_2), set_var(tidb_hash_join_version=optimized) */ * from t1 where t1.b<10 and not exists (select 1 from t2_part where t1.a=t2_part.a and t2_part.b<20);
id estRows task access object operator info
HashJoin_19 2658.67 root anti semi join, left side:TableReader_23, equal:[eq(planner__core__casetest__integration.t1.a, planner__core__casetest__integration.t2_part.a)]
├─PartitionUnion_24(Build) 13293.33 root
Expand Down
Loading

0 comments on commit 72a1145

Please sign in to comment.