From ac77889c858a8e70fb98426b4e7af1c78f3675af Mon Sep 17 00:00:00 2001
From: Andrew Kimball <andrewekimball@gmail.com>
Date: Wed, 15 Jun 2022 23:32:53 -0600
Subject: [PATCH 1/4] opt: fix memo cycle caused by join ordering

In some rare cases when null-rejection rules fail to fire, a redundant filter
can be inferred in an `InnerJoin` - `LeftJoin` complex. This could previously
result in the `JoinOrderBuilder` attempting to add a `Select` to the same memo
group as its input, which would create a memo cycle. This patch prevents the
`JoinOrderBuilder` from adding the `Select` to the memo in such cases.

What follows is a more detailed explanation of the conditions that could
previously cause a memo cycle.

`InnerJoin` operators have two properties that make them more 'reorderable'
than other types of joins: (1) their conjuncts can be reordered separately
and (2) new conjuncts can be inferred from equalities. As a special case of
(1), an `InnerJoin` can be pushed into the left side of a `LeftJoin`, leaving
behind any `Select` conjuncts that reference the right side of the `LeftJoin`.

This allows the `JoinOrderBuilder` to make the following transformation:
```
(InnerJoin
   A
   (InnerJoin
      B
      (LeftJoin
         C
         D
         c=d
      )
      b=c
   )
   a=b, a=d
)
=>
(InnerJoin
   A
   (Select
      (LeftJoin
         (InnerJoin
            B
            C
            b=c
         )
         D
         c=d
      )
      b=d // Inferred filter!
   )
   a=b, a=d
)
```
Note the new `b=d` filter that was inferred and subsequently left on
a `Select` operator after the reordering. The crucial point is that
this filter is redundant - the input to the `Select` is already a
valid reordering of the `BCD` join complex.

The `JoinOrderBuilder` avoids adding redundant filters to `InnerJoin`
operators, but does not do the same for the `Select` case because it
was assumed that the filters left on the `Select` would never be redundant.
This is because the `a=d` filter *should* rejects nulls, so the `LeftJoin`
should have been simplified. However, in rare cases null-rejection does not
take place. Because the input to the `Select` is already a valid reordering,
the `JoinOrderBuilder` ends up trying to add the `Select` to the same group
as its input - namely, the `BCD` join group. This causes a cycle in the memo.

Fixes #80901

Release note (bug fix): Fixed a bug that could cause an optimizer
panic in rare cases when a query had a left join in the input of
an inner join.
---
 pkg/sql/opt/testutils/opttester/opt_tester.go |   6 +
 .../opt/testutils/opttester/reorder_joins.go  |  40 +--
 pkg/sql/opt/xform/join_order_builder.go       |  82 ++++--
 pkg/sql/opt/xform/testdata/rules/join_order   | 271 ++++++++++++++++++
 4 files changed, 354 insertions(+), 45 deletions(-)

diff --git a/pkg/sql/opt/testutils/opttester/opt_tester.go b/pkg/sql/opt/testutils/opttester/opt_tester.go
index d2f0a343b640..e6838452cd2f 100644
--- a/pkg/sql/opt/testutils/opttester/opt_tester.go
+++ b/pkg/sql/opt/testutils/opttester/opt_tester.go
@@ -528,6 +528,9 @@ func New(catalog cat.Catalog, sql string) *OptTester {
 //  - group-limit: used with check-size to set a max limit on the number of
 //    groups that can be added to the memo before a testing error is returned.
 //
+//  - memo-cycles: used with memo to search the memo for cycles and output a
+//    path with a cycle if one is found.
+//
 //  - use-multi-col-stats sets the value for
 //  SessionData.OptimizerUseMultiColStats which indicates whether or not
 //  multi-column statistics are used for cardinality estimation in the
@@ -1115,6 +1118,9 @@ func (f *Flags) Set(arg datadriven.CmdArg) error {
 		}
 		f.UseMultiColStats = b
 
+	case "memo-cycles":
+		f.MemoFormat = xform.FmtCycle
+
 	default:
 		return fmt.Errorf("unknown argument: %s", arg.Key)
 	}
diff --git a/pkg/sql/opt/testutils/opttester/reorder_joins.go b/pkg/sql/opt/testutils/opttester/reorder_joins.go
index 261b4d2d2918..8ea9437216f4 100644
--- a/pkg/sql/opt/testutils/opttester/reorder_joins.go
+++ b/pkg/sql/opt/testutils/opttester/reorder_joins.go
@@ -73,23 +73,29 @@ func (ot *OptTester) ReorderJoins() (string, error) {
 			relsJoinedLast = ""
 		})
 
-	o.JoinOrderBuilder().NotifyOnAddJoin(func(left, right, all, refs []memo.RelExpr, op opt.Operator) {
-		relsToJoin := jof.formatVertexSet(all)
-		if relsToJoin != relsJoinedLast {
-			ot.output(fmt.Sprintf("Joining %s\n", relsToJoin))
-			relsJoinedLast = relsToJoin
-		}
-		ot.indent(
-			fmt.Sprintf(
-				"%s %s [%s, refs=%s]",
-				jof.formatVertexSet(left),
-				jof.formatVertexSet(right),
-				joinOpLabel(op),
-				jof.formatVertexSet(refs),
-			),
-		)
-		joinsConsidered++
-	})
+	o.JoinOrderBuilder().NotifyOnAddJoin(
+		func(left, right, all, joinRefs, selRefs []memo.RelExpr, op opt.Operator) {
+			relsToJoin := jof.formatVertexSet(all)
+			if relsToJoin != relsJoinedLast {
+				ot.output(fmt.Sprintf("Joining %s\n", relsToJoin))
+				relsJoinedLast = relsToJoin
+			}
+			var selString string
+			if len(selRefs) > 0 {
+				selString = fmt.Sprintf(" [select, refs=%s]", jof.formatVertexSet(selRefs))
+			}
+			ot.indent(
+				fmt.Sprintf(
+					"%s %s [%s, refs=%s]%s",
+					jof.formatVertexSet(left),
+					jof.formatVertexSet(right),
+					joinOpLabel(op),
+					jof.formatVertexSet(joinRefs),
+					selString,
+				),
+			)
+			joinsConsidered++
+		})
 
 	expr, err := ot.optimizeExpr(o, nil)
 	if err != nil {
diff --git a/pkg/sql/opt/xform/join_order_builder.go b/pkg/sql/opt/xform/join_order_builder.go
index 3d68a4080f10..d0bea26af806 100644
--- a/pkg/sql/opt/xform/join_order_builder.go
+++ b/pkg/sql/opt/xform/join_order_builder.go
@@ -62,7 +62,7 @@ type OnReorderRuleParam struct {
 // the base relations of the left and right inputs of the join, the set of all
 // base relations currently being considered, the base relations referenced by
 // the join's ON condition, and the type of join.
-type OnAddJoinFunc func(left, right, all, refs []memo.RelExpr, op opt.Operator)
+type OnAddJoinFunc func(left, right, all, joinRefs, selectRefs []memo.RelExpr, op opt.Operator)
 
 // JoinOrderBuilder is used to add valid orderings of a given join tree to the
 // memo during exploration.
@@ -689,7 +689,7 @@ func (jb *JoinOrderBuilder) addJoin(
 	}
 	if jb.onAddJoinFunc != nil {
 		// Hook for testing purposes.
-		jb.callOnAddJoinFunc(s1, s2, joinFilters, op)
+		jb.callOnAddJoinFunc(s1, s2, joinFilters, selectFilters, op)
 	}
 
 	left := jb.plans[s1]
@@ -715,7 +715,7 @@ func (jb *JoinOrderBuilder) addJoin(
 
 		if jb.onAddJoinFunc != nil {
 			// Hook for testing purposes.
-			jb.callOnAddJoinFunc(s2, s1, joinFilters, op)
+			jb.callOnAddJoinFunc(s2, s1, joinFilters, selectFilters, op)
 		}
 	}
 }
@@ -754,6 +754,12 @@ func (jb *JoinOrderBuilder) addToGroup(
 ) {
 	if len(selectFilters) > 0 {
 		joinExpr := jb.memoize(op, left, right, on, nil)
+		if joinExpr.FirstExpr() == grp.FirstExpr() {
+			// In rare cases, the select filters may be redundant. In this case,
+			// adding a select to the group with the redundant filters would create a
+			// memo cycle (see #80901).
+			return
+		}
 		selectExpr := &memo.SelectExpr{
 			Input:   joinExpr,
 			Filters: selectFilters,
@@ -950,13 +956,14 @@ func (jb *JoinOrderBuilder) callOnReorderFunc(join memo.RelExpr) {
 // callOnAddJoinFunc calls the onAddJoinFunc callback function. Panics if the
 // function is nil.
 func (jb *JoinOrderBuilder) callOnAddJoinFunc(
-	s1, s2 vertexSet, edges memo.FiltersExpr, op opt.Operator,
+	s1, s2 vertexSet, joinFilters, selectFilters memo.FiltersExpr, op opt.Operator,
 ) {
 	jb.onAddJoinFunc(
 		jb.getRelationSlice(s1),
 		jb.getRelationSlice(s2),
 		jb.getRelationSlice(s1.union(s2)),
-		jb.getRelationSlice(jb.getRelations(jb.getFreeVars(edges))),
+		jb.getRelationSlice(jb.getRelations(jb.getFreeVars(joinFilters))),
+		jb.getRelationSlice(jb.getRelations(jb.getFreeVars(selectFilters))),
 		op,
 	)
 }
@@ -1269,6 +1276,22 @@ func (e *edge) checkNonInnerJoin(s1, s2 vertexSet) bool {
 	//      assoc(left-join, inner-join) is false.
 	//   3. The TES now includes all three relations, meaning that the inner join
 	//      cannot join any two relations together (including xy and uv).
+	//
+	// Note that checking that the TES intersects both s1 and s2 diverges slightly
+	// from the paper. This makes explicit the fact that we forbid the
+	// introduction of cross joins that did not exist in the original normalized
+	// plan. (The paper checks if the left and right tables intersect s1 and s2
+	// respectively). However, the check is exactly equivalent to that given in
+	// the paper for the following reasons:
+	//   1. For degenerate predicates (one or both inputs not referenced) we add
+	//      all base relations from the unreferenced input(s) to the TES
+	//      (see calcTES).
+	//   2. (1) ensures that (TES ∩ S != ∅) implies (TABLES(input) ∩ S != ∅).
+	//   3. Since we discard join orders that introduce new cross-products anyway,
+	//      we always filter out cases where (TABLES(input) ∩ S != ∅) but
+	//      (TES ∩ S == ∅).
+	// Therefore, the check we use here prevents exactly the same reorderings as
+	// the check used in the paper.
 	return e.tes.intersection(e.op.leftVertexes).isSubsetOf(s1) &&
 		e.tes.intersection(e.op.rightVertexes).isSubsetOf(s2) &&
 		e.tes.intersects(s1) && e.tes.intersects(s2)
@@ -1277,29 +1300,6 @@ func (e *edge) checkNonInnerJoin(s1, s2 vertexSet) bool {
 // checkInnerJoin performs an applicability check for an inner join between the
 // two given sets of base relations. If it returns true, an inner join can be
 // constructed using the filters from this edge and the two given relation sets.
-//
-// Why is the inner join check different from the non-inner join check?
-// In effect, the difference between the inner and non-inner edge checks is that
-// for inner joins, relations can be moved 'across' the join relative to their
-// positions in the original join tree. This is necessary in order to allow
-// inner join conjuncts from different joins to be combined into new join
-// operators. For example, take this perfectly valid (and desirable)
-// transformation:
-//
-//    SELECT * FROM xy
-//    INNER JOIN (SELECT * FROM ab INNER JOIN uv ON a = u)
-//    ON x = a AND x = u
-//    =>
-//    SELECT * FROM ab
-//    INNER JOIN (SELECT * FROM xy INNER JOIN uv ON x = u)
-//    ON x = a AND a = u
-//
-// Note that, from the perspective of the x = a edge, it looks like the join has
-// been commuted (the xy and ab relations switched sides). From the perspective
-// of the a = u edge, however, all relations that were previously on the left
-// are still on the left, and all relations that were on the right are still on
-// the right. The stricter requirements of checkNonInnerJoin would not allow
-// this transformation to take place.
 func (e *edge) checkInnerJoin(s1, s2 vertexSet) bool {
 	if !e.checkRules(s1, s2) {
 		// The conflict rules for this edge are not satisfied for a join between s1
@@ -1310,6 +1310,32 @@ func (e *edge) checkInnerJoin(s1, s2 vertexSet) bool {
 	// The TES must be a subset of the relations of the candidate join inputs. In
 	// addition, the TES must intersect both s1 and s2 (the edge must connect the
 	// two vertex sets).
+	//
+	// Why is the inner join check different from the non-inner join check?
+	// In effect, the difference between the inner and non-inner edge checks is
+	// that for inner joins, relations can be moved 'across' the join relative to
+	// their positions in the original join tree. This is necessary in order to
+	// allow inner join conjuncts from different joins to be combined into new
+	// join operators. For example, take this perfectly valid (and desirable)
+	// transformation:
+	//
+	//    SELECT * FROM xy
+	//    INNER JOIN (SELECT * FROM ab INNER JOIN uv ON a = u)
+	//    ON x = a AND x = u
+	//    =>
+	//    SELECT * FROM ab
+	//    INNER JOIN (SELECT * FROM xy INNER JOIN uv ON x = u)
+	//    ON x = a AND a = u
+	//
+	// Note that, from the perspective of the x = a edge, it looks like the join
+	// has been commuted (the xy and ab relations switched sides). From the
+	// perspective of the a = u edge, however, all relations that were previously
+	// on the left are still on the left, and all relations that were on the right
+	// are still on the right. The stricter requirements of checkNonInnerJoin
+	// would not allow this transformation to take place.
+	//
+	// See the checkNonInnerJoin comments for an explanation of why the
+	// intersection checks differ from those shown in the paper.
 	return e.tes.isSubsetOf(s1.union(s2)) && e.tes.intersects(s1) && e.tes.intersects(s2)
 }
 
diff --git a/pkg/sql/opt/xform/testdata/rules/join_order b/pkg/sql/opt/xform/testdata/rules/join_order
index e96f2956f56e..bca37e0c7a2b 100644
--- a/pkg/sql/opt/xform/testdata/rules/join_order
+++ b/pkg/sql/opt/xform/testdata/rules/join_order
@@ -2397,6 +2397,107 @@ inner-join (hash)
       └── a:1 = z:16 [outer=(1,16), constraints=(/1: (/NULL - ]; /16: (/NULL - ]), fd=(1)==(16), (16)==(1)]
 
 
+# Special case for reordering inner-joins and left-joins: One inner-join
+# conjunct can be pushed into the left side of the left-join, and the other
+# remains above the left-join on a select operator. The 'OR z IS NULL' expression
+# prevents the left join from being null-rejected.
+reorderjoins
+SELECT * FROM bx
+INNER JOIN
+(
+  SELECT * FROM cy
+  LEFT JOIN dz
+  ON c = d
+)
+ON b = c AND (x = z OR z IS NULL)
+----
+--------------------------------------------------------------------------------
+Join Tree #1
+--------------------------------------------------------------------------------
+  left-join (hash)
+   ├── scan cy
+   ├── scan dz
+   └── filters
+        └── c = d
+Vertexes
+  A:
+    scan cy
+  B:
+    scan dz
+Edges
+  c = d [left, ses=AB, tes=AB, rules=()]
+Joining AB
+  A B [left, refs=AB]
+Joins Considered: 1
+--------------------------------------------------------------------------------
+Join Tree #2
+--------------------------------------------------------------------------------
+  inner-join (hash)
+   ├── scan bx
+   ├── left-join (hash)
+   │    ├── scan cy
+   │    ├── scan dz
+   │    └── filters
+   │         └── c = d
+   └── filters
+        ├── b = c
+        └── (x = z) OR (z IS NULL)
+Vertexes
+  C:
+    scan bx
+  A:
+    scan cy
+  B:
+    scan dz
+Edges
+  c = d [left, ses=AB, tes=AB, rules=()]
+  b = c [inner, ses=CA, tes=CA, rules=()]
+  (x = z) OR (z IS NULL) [inner, ses=CB, tes=CAB, rules=()]
+Joining CA
+  C A [inner, refs=CA]
+  A C [inner, refs=CA]
+Joining AB
+  A B [left, refs=AB]
+Joining CAB
+  C AB [inner, refs=CAB]
+  AB C [inner, refs=CAB]
+  CA B [left, refs=AB] [select, refs=CB]
+Joins Considered: 6
+================================================================================
+Final Plan
+================================================================================
+inner-join (merge)
+ ├── columns: b:1!null x:2 c:5!null y:6 d:9 z:10
+ ├── left ordering: +1
+ ├── right ordering: +5
+ ├── key: (5)
+ ├── fd: (1)-->(2), (5)-->(6,9,10), (9)-->(10), (1)==(5), (5)==(1)
+ ├── scan bx
+ │    ├── columns: b:1!null x:2
+ │    ├── key: (1)
+ │    ├── fd: (1)-->(2)
+ │    └── ordering: +1
+ ├── left-join (merge)
+ │    ├── columns: c:5!null y:6 d:9 z:10
+ │    ├── left ordering: +5
+ │    ├── right ordering: +9
+ │    ├── key: (5)
+ │    ├── fd: (5)-->(6,9,10), (9)-->(10)
+ │    ├── ordering: +5
+ │    ├── scan cy
+ │    │    ├── columns: c:5!null y:6
+ │    │    ├── key: (5)
+ │    │    ├── fd: (5)-->(6)
+ │    │    └── ordering: +5
+ │    ├── scan dz
+ │    │    ├── columns: d:9!null z:10
+ │    │    ├── key: (9)
+ │    │    ├── fd: (9)-->(10)
+ │    │    └── ordering: +9
+ │    └── filters (true)
+ └── filters
+      └── (x:2 = z:10) OR (z:10 IS NULL) [outer=(2,10)]
+
 # Regression test for #59076. Do not reorder on the inner join produced by
 # CommuteSemiJoin when it matches on an already-reordered semi join because
 # doing so can lead to an exponential blowup in the size of the memo.
@@ -2596,3 +2697,173 @@ project
       │         └── t2.a:5 = 123456 [outer=(5), constraints=(/5: [/123456 - /123456]; tight), fd=()-->(5)]
       └── filters
            └── t1.a:1 = 123456 [outer=(1), constraints=(/1: [/123456 - /123456]; tight), fd=()-->(1)]
+
+# Regression test for #80901. Do not cause a memo cycle when redundant inner
+# join filters can be inferred that reference the right side of a left join, and
+# the inner join is pushed into the left side of the left join.
+
+exec-ddl
+CREATE TABLE table80901_1 (
+  col1_5  DECIMAL,
+  col1_7  BOOL,
+  col1_9  OID,
+  col1_11 STRING,
+  col1_12 STRING,
+  col1_14 STRING,
+  col1_15 STRING,
+  col1_16 STRING,
+  col1_17 STRING
+);
+----
+
+exec-ddl
+CREATE TABLE table80901_3 (col3_2 OID, col3_4 FLOAT8, col3_9 STRING);
+----
+
+memo memo-cycles
+SELECT (
+  SELECT NULL
+  FROM table80901_1 AS tab_42924
+  JOIN table80901_1 AS tab_42927
+    LEFT JOIN table80901_3 AS tab_42928 ON tab_42921.col1_7
+  JOIN table80901_3 AS tab_42929
+  ON tab_42927.col1_9 = tab_42929.col3_2
+  ON tab_42924.col1_17 = tab_42927.col1_15
+    AND tab_42924.col1_12 = tab_42929.col3_9
+    AND tab_42924.col1_14 = tab_42928.col3_9
+    AND tab_42924.col1_14 = tab_42929.col3_9
+    AND tab_42924.col1_11 = tab_42927.col1_17
+    AND tab_42924.col1_11 = tab_42927.col1_16
+    AND tab_42924.col1_15 = tab_42929.col3_9
+    AND tab_42924.col1_5 = tab_42929.crdb_internal_mvcc_timestamp
+)
+  FROM table80901_1 AS tab_42921;
+----
+memo (optimized, ~60KB, required=[presentation: ?column?:50])
+ ├── G1: (project G2 G3)
+ │    └── [presentation: ?column?:50]
+ │         ├── best: (project G2 G3)
+ │         └── cost: 13000.58
+ ├── G2: (ensure-distinct-on G4 G5 cols=(10)) (ensure-distinct-on G4 G5 cols=(10),ordering=+10)
+ │    └── []
+ │         ├── best: (ensure-distinct-on G4 G5 cols=(10))
+ │         └── cost: 11263.07
+ ├── G3: (projections G6)
+ ├── G4: (left-join-apply G7 G8 G9)
+ │    ├── [ordering: +10]
+ │    │    ├── best: (sort G4)
+ │    │    └── cost: 40638.22
+ │    └── []
+ │         ├── best: (left-join-apply G7 G8 G9)
+ │         └── cost: 6609.67
+ ├── G5: (aggregations G10)
+ ├── G6: (variable "?column?")
+ ├── G7: (scan table80901_1 [as=tab_42921],cols=(2,10))
+ │    └── []
+ │         ├── best: (scan table80901_1 [as=tab_42921],cols=(2,10))
+ │         └── cost: 1145.22
+ ├── G8: (project G11 G12)
+ │    └── []
+ │         ├── best: (project G11 G12)
+ │         └── cost: 4581.66
+ ├── G9: (filters)
+ ├── G10: (const-agg G6)
+ ├── G11: (inner-join G13 G14 G15) (inner-join G14 G13 G15) (select G16 G17) (inner-join G18 G19 G20) (inner-join G19 G18 G20) (inner-join G21 G22 G23) (inner-join G22 G21 G23)
+ │    └── []
+ │         ├── best: (select G16 G17)
+ │         └── cost: 4579.90
+ ├── G12: (projections G24)
+ ├── G13: (scan table80901_1 [as=tab_42924],cols=(13,16-19,21))
+ │    └── []
+ │         ├── best: (scan table80901_1 [as=tab_42924],cols=(13,16-19,21))
+ │         └── cost: 1185.62
+ ├── G14: (inner-join G18 G22 G25) (left-join G26 G27 G28) (inner-join G22 G18 G25) (right-join G27 G26 G28) (inner-join G22 G18 G29)
+ │    └── []
+ │         ├── best: (left-join G26 G27 G28)
+ │         └── cost: 101613.03
+ ├── G15: (filters G30 G31 G32 G33 G34 G35 G36 G37)
+ ├── G16: (left-join G38 G27 G28) (right-join G27 G38 G28)
+ │    └── []
+ │         ├── best: (right-join G27 G38 G28)
+ │         └── cost: 4579.00
+ ├── G17: (filters G32)
+ ├── G18: (left-join G39 G27 G28) (right-join G27 G39 G28)
+ │    └── []
+ │         ├── best: (left-join G39 G27 G28)
+ │         └── cost: 12270.12
+ ├── G19: (inner-join G13 G22 G40) (inner-join G22 G13 G40)
+ │    └── []
+ │         ├── best: (inner-join G13 G22 G40)
+ │         └── cost: 2311.47
+ ├── G20: (filters G41 G30 G32 G34 G35)
+ ├── G21: (inner-join G13 G18 G42) (inner-join G18 G13 G42) (select G43 G44)
+ │    └── []
+ │         ├── best: (select G43 G44)
+ │         └── cost: 5372.91
+ ├── G22: (scan table80901_3 [as=tab_42929],cols=(43,45,47))
+ │    └── []
+ │         ├── best: (scan table80901_3 [as=tab_42929],cols=(43,45,47))
+ │         └── cost: 1094.72
+ ├── G23: (filters G41 G31 G37)
+ ├── G24: (null)
+ ├── G25: (filters G41)
+ ├── G26: (inner-join G39 G22 G25) (inner-join G22 G39 G25)
+ │    └── []
+ │         ├── best: (inner-join G39 G22 G25)
+ │         └── cost: 2388.32
+ ├── G27: (scan table80901_3 [as=tab_42928],cols=(39))
+ │    └── []
+ │         ├── best: (scan table80901_3 [as=tab_42928],cols=(39))
+ │         └── cost: 1074.52
+ ├── G28: (filters G45)
+ ├── G29: (filters G41 G46)
+ ├── G30: (eq G47 G48)
+ ├── G31: (eq G49 G50)
+ ├── G32: (eq G51 G52)
+ ├── G33: (eq G51 G50)
+ ├── G34: (eq G53 G54)
+ ├── G35: (eq G53 G55)
+ ├── G36: (eq G56 G50)
+ ├── G37: (eq G57 G58)
+ ├── G38: (inner-join G13 G26 G59) (inner-join G26 G13 G59) (inner-join G39 G19 G60) (inner-join G19 G39 G60) (inner-join G61 G22 G62) (inner-join G22 G61 G62)
+ │    └── []
+ │         ├── best: (inner-join G39 G19 G60)
+ │         └── cost: 3491.07
+ ├── G39: (scan table80901_1 [as=tab_42927],cols=(27,31-33))
+ │    └── []
+ │         ├── best: (scan table80901_1 [as=tab_42927],cols=(27,31-33))
+ │         └── cost: 1165.42
+ ├── G40: (filters G31 G33 G36 G37)
+ ├── G41: (eq G63 G64)
+ ├── G42: (filters G30 G32 G34 G35 G65 G66)
+ ├── G43: (left-join G61 G27 G28) (right-join G27 G61 G28)
+ │    └── []
+ │         ├── best: (right-join G27 G61 G28)
+ │         └── cost: 4421.87
+ ├── G44: (filters G32 G65 G66)
+ ├── G45: (variable tab_42921.col1_7)
+ ├── G46: (eq G52 G50)
+ ├── G47: (variable tab_42924.col1_17)
+ ├── G48: (variable tab_42927.col1_15)
+ ├── G49: (variable tab_42924.col1_12)
+ ├── G50: (variable tab_42929.col3_9)
+ ├── G51: (variable tab_42924.col1_14)
+ ├── G52: (variable tab_42928.col3_9)
+ ├── G53: (variable tab_42924.col1_11)
+ ├── G54: (variable tab_42927.col1_17)
+ ├── G55: (variable tab_42927.col1_16)
+ ├── G56: (variable tab_42924.col1_15)
+ ├── G57: (variable tab_42924.col1_5)
+ ├── G58: (variable tab_42929.crdb_internal_mvcc_timestamp)
+ ├── G59: (filters G30 G31 G33 G34 G35 G36 G37)
+ ├── G60: (filters G41 G30 G34 G35)
+ ├── G61: (inner-join G13 G39 G67) (inner-join G39 G13 G67)
+ │    └── []
+ │         ├── best: (inner-join G13 G39 G67)
+ │         └── cost: 2382.17
+ ├── G62: (filters G41 G31 G33 G36 G37)
+ ├── G63: (variable tab_42927.col1_9)
+ ├── G64: (variable tab_42929.col3_2)
+ ├── G65: (eq G49 G52)
+ ├── G66: (eq G56 G52)
+ └── G67: (filters G30 G34 G35)

From 688f535f1a407e614f80abeeecc654a86b201a74 Mon Sep 17 00:00:00 2001
From: Andrew Werner <awerner32@gmail.com>
Date: Thu, 7 Jul 2022 10:09:17 -0400
Subject: [PATCH 2/4] server: log stacks on 500 errors

Before this change, we'd just log the error body, which often is not very
helpful. The format specifier makes me think that the intention was to log
the stacks. This made debugging [this](https://github.com/cockroachdb/cockroach/pull/83677#issuecomment-1177612813)
trivial as opposed to hard.

Release note: None
---
 pkg/server/admin.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/server/admin.go b/pkg/server/admin.go
index 5af4dfe14cb3..ef42f218dd99 100644
--- a/pkg/server/admin.go
+++ b/pkg/server/admin.go
@@ -186,7 +186,7 @@ func (s *adminServer) RegisterGateway(
 // serverError logs the provided error and returns an error that should be returned by
 // the RPC endpoint method.
 func serverError(ctx context.Context, err error) error {
-	log.ErrorfDepth(ctx, 1, "%+s", err)
+	log.ErrorfDepth(ctx, 1, "%+v", err)
 	return errAdminAPIError
 }
 

From 99016206345eaad71443cfb6aaaf330846f63f12 Mon Sep 17 00:00:00 2001
From: Bilal Akhtar <bilal@cockroachlabs.com>
Date: Wed, 6 Jul 2022 17:39:07 -0400
Subject: [PATCH 3/4] storage/metamorphic: Add MVCC delete range using
 tombstone

This change adds MVCCDeleteRangeUsingTombstone to the MVCC
metamorphic tests. MVCCDeleteRange had already existed in
this test suite, so this ended up being a relatively simple
addition.

One part of #82545, with possibly more parts to follow
as other MVCC-level operations are added that utilize
`writer.{Put,Clear}{MVCC,Engine}RangeKey`.

Release note: None.
---
 pkg/storage/metamorphic/generator.go  |  2 +-
 pkg/storage/metamorphic/operations.go | 53 ++++++++++++++++++++++++++-
 pkg/storage/metamorphic/options.go    |  4 ++
 3 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/pkg/storage/metamorphic/generator.go b/pkg/storage/metamorphic/generator.go
index ee9fa6066871..7adacd23ad28 100644
--- a/pkg/storage/metamorphic/generator.go
+++ b/pkg/storage/metamorphic/generator.go
@@ -70,7 +70,7 @@ func (e *engineConfig) String() string {
 	return e.name
 }
 
-var engineConfigStandard = engineConfig{"standard=0", storage.DefaultPebbleOptions()}
+var engineConfigStandard = engineConfig{"standard=0", standardOptions(0)}
 
 type engineSequence struct {
 	name    string
diff --git a/pkg/storage/metamorphic/operations.go b/pkg/storage/metamorphic/operations.go
index c435188abdb7..ce51ee381198 100644
--- a/pkg/storage/metamorphic/operations.go
+++ b/pkg/storage/metamorphic/operations.go
@@ -300,6 +300,30 @@ func (m mvccDeleteRangeOp) run(ctx context.Context) string {
 	return builder.String()
 }
 
+type mvccDeleteRangeUsingRangeTombstoneOp struct {
+	m      *metaTestRunner
+	writer readWriterID
+	key    roachpb.Key
+	endKey roachpb.Key
+	ts     hlc.Timestamp
+}
+
+func (m mvccDeleteRangeUsingRangeTombstoneOp) run(ctx context.Context) string {
+	writer := m.m.getReadWriter(m.writer)
+	if m.key.Compare(m.endKey) >= 0 {
+		// Empty range. No-op.
+		return "no-op due to no non-conflicting key range"
+	}
+
+	err := storage.MVCCDeleteRangeUsingTombstone(ctx, writer, nil, m.key, m.endKey,
+		m.ts, hlc.ClockTimestamp{}, m.key, m.endKey, math.MaxInt64 /* maxIntents */)
+	if err != nil {
+		return fmt.Sprintf("error: %s", err)
+	}
+
+	return fmt.Sprintf("deleted range = %s - %s", m.key, m.endKey)
+}
+
 type mvccClearTimeRangeOp struct {
 	m         *metaTestRunner
 	key       roachpb.Key
@@ -923,7 +947,34 @@ var opGenerators = []opGenerator{
 			operandUnusedMVCCKey,
 			operandUnusedMVCCKey,
 		},
-		weight: 20,
+		weight: 10,
+	},
+	{
+		name: "mvcc_delete_range_using_range_tombstone",
+		generate: func(ctx context.Context, m *metaTestRunner, args ...string) mvccOp {
+			writer := readWriterID(args[0])
+			key := m.keyGenerator.parse(args[1]).Key
+			endKey := m.keyGenerator.parse(args[2]).Key
+			ts := m.nextTSGenerator.parse(args[3])
+
+			if endKey.Compare(key) < 0 {
+				key, endKey = endKey, key
+			}
+			return &mvccDeleteRangeUsingRangeTombstoneOp{
+				m:      m,
+				writer: writer,
+				key:    key,
+				endKey: endKey,
+				ts:     ts,
+			}
+		},
+		operands: []operandType{
+			operandReadWriter,
+			operandMVCCKey,
+			operandMVCCKey,
+			operandNextTS,
+		},
+		weight: 10,
 	},
 	{
 		name: "mvcc_clear_time_range",
diff --git a/pkg/storage/metamorphic/options.go b/pkg/storage/metamorphic/options.go
index e1fac10fef0f..8a6c42fe9c61 100644
--- a/pkg/storage/metamorphic/options.go
+++ b/pkg/storage/metamorphic/options.go
@@ -100,11 +100,15 @@ func standardOptions(i int) *pebble.Options {
 	if err := opts.Parse(stdOpts[i], nil); err != nil {
 		panic(err)
 	}
+	// Enable range keys in all options.
+	opts.FormatMajorVersion = pebble.FormatRangeKeys
 	return opts
 }
 
 func randomOptions() *pebble.Options {
 	opts := storage.DefaultPebbleOptions()
+	// Enable range keys in all options.
+	opts.FormatMajorVersion = pebble.FormatRangeKeys
 
 	rng, _ := randutil.NewTestRand()
 	opts.BytesPerSync = 1 << rngIntRange(rng, 8, 30)

From a6bbc5fccc38322d94d7544e7d536439a4c4db0a Mon Sep 17 00:00:00 2001
From: Ricky Stewart <ricky@cockroachlabs.com>
Date: Thu, 7 Jul 2022 12:36:46 -0500
Subject: [PATCH 4/4] bazel: clear configurations when running `git grep` in
 `check.sh`

The configurations `grep.{column,lineNumber,fullName}` can be set
globally or on a per-user basis and change the output of `git grep`,
which breaks checks that do exact-string matching. We manually clear
these configurations for the `git grep`s in this file to ensure the
output is predictable on any machine.

Release note: None
---
 build/bazelutil/check.sh | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/build/bazelutil/check.sh b/build/bazelutil/check.sh
index f446fb13f4d8..ec1b391d03f0 100755
--- a/build/bazelutil/check.sh
+++ b/build/bazelutil/check.sh
@@ -6,6 +6,9 @@ set -euo pipefail
 # wrong with the Bazel build. This is run in CI as well as by `dev generate`.
 # Set COCKROACH_BAZEL_CHECK_FAST to skip the longer-running logic in this file.
 
+CONFIGS="-c grep.column=false -c grep.lineNumber=false -c grep.fullName=false"
+GIT_GREP="git $CONFIGS grep"
+
 EXISTING_GO_GENERATE_COMMENTS="
 pkg/roachprod/vm/aws/config.go://go:generate go-bindata -mode 0600 -modtime 1400000000 -pkg aws -o embedded.go config.json old.json
 pkg/roachprod/vm/aws/config.go://go:generate gofmt -s -w embedded.go
@@ -63,7 +66,7 @@ pkg/util/buildutil/crdb_test_on.go://go:build crdb_test && !crdb_test_off
 "
 
 if [ -z "${COCKROACH_BAZEL_CHECK_FAST:-}" ]; then
-    git grep 'go:generate stringer' pkg | while read LINE; do
+    $GIT_GREP 'go:generate stringer' pkg | while read LINE; do
         dir=$(dirname $(echo $LINE | cut -d: -f1))
         type=$(echo $LINE | grep -o -- '-type[= ][^ ]*' | sed 's/-type[= ]//g' | awk '{print tolower($0)}')
         build_out=$(bazel query --output=build "//$dir:${type}_string.go")
@@ -79,7 +82,7 @@ fi
 # We exclude stringer and add-leaktest.sh -- the former is already all
 # Bazelfied, and the latter can be safely ignored since we have a lint to check
 # the same thing: https://github.com/cockroachdb/cockroach/issues/64440
-git grep '//go:generate' 'pkg/**/*.go' | grep -v stringer | grep -v 'add-leaktest\.sh' | while read LINE; do
+$GIT_GREP '//go:generate' 'pkg/**/*.go' | grep -v stringer | grep -v 'add-leaktest\.sh' | while read LINE; do
     if [[ "$EXISTING_GO_GENERATE_COMMENTS" == *"$LINE"* ]]; then
 	# Grandfathered.
 	continue
@@ -93,7 +96,7 @@ git grep '//go:generate' 'pkg/**/*.go' | grep -v stringer | grep -v 'add-leaktes
     exit 1
 done
 
-git grep 'broken_in_bazel' pkg | grep BUILD.bazel: | grep -v pkg/BUILD.bazel | grep -v pkg/cli/BUILD.bazel | grep -v generate-test-suites | cut -d: -f1 | while read LINE; do
+$GIT_GREP 'broken_in_bazel' pkg | grep BUILD.bazel: | grep -v pkg/BUILD.bazel | grep -v pkg/cli/BUILD.bazel | grep -v generate-test-suites | cut -d: -f1 | while read LINE; do
     if [[ "$EXISTING_BROKEN_TESTS_IN_BAZEL" == *"$LINE"* ]]; then
 	# Grandfathered.
 	continue
@@ -103,7 +106,7 @@ git grep 'broken_in_bazel' pkg | grep BUILD.bazel: | grep -v pkg/BUILD.bazel | g
     exit 1
 done
 
-git grep '//go:build' pkg | grep crdb_test | while read LINE; do
+$GIT_GREP '//go:build' pkg | grep crdb_test | while read LINE; do
     if [[ "$EXISTING_CRDB_TEST_BUILD_CONSTRAINTS" == *"$LINE"* ]]; then
         # Grandfathered.
         continue