diff --git a/pkg/ccl/logictestccl/testdata/logic_test/zone b/pkg/ccl/logictestccl/testdata/logic_test/zone new file mode 100644 index 000000000000..2322e43fa4e1 --- /dev/null +++ b/pkg/ccl/logictestccl/testdata/logic_test/zone @@ -0,0 +1,75 @@ +# LogicTest: 5node-dist-opt + +# Ensure that cost-based-optimizer uses an index with zone constraints that most +# closely matches the gateway's locality. + +statement ok +CREATE TABLE t ( + k INT PRIMARY KEY, + v STRING, + INDEX secondary (k) STORING (v) +); + +# ------------------------------------------------------------------------------ +# Put table in dc2 and secondary index in dc1 so that the gateway matches the +# secondary index rather the primary index. +# ------------------------------------------------------------------------------ + +statement ok +ALTER TABLE t CONFIGURE ZONE USING constraints='[+region=test,+dc=dc2]' + +statement ok +ALTER INDEX t@secondary CONFIGURE ZONE USING constraints='[+region=test,+dc=dc1]' + +query TTT +EXPLAIN SELECT * FROM t WHERE k=10 +---- +scan · · +· table t@secondary +· spans /10-/11 + +# ------------------------------------------------------------------------------ +# Swap location of primary and secondary indexes and ensure that primary index +# is used instead. +# ------------------------------------------------------------------------------ + +statement ok +ALTER TABLE t CONFIGURE ZONE USING constraints='[+region=test,+dc=dc1]' + +statement ok +ALTER INDEX t@secondary CONFIGURE ZONE USING constraints='[+region=test,+dc=dc2]' + +query TTT +EXPLAIN SELECT * FROM t WHERE k=10 +---- +scan · · +· table t@primary +· spans /10-/10/# + +# ------------------------------------------------------------------------------ +# Use PREPARE to make sure that the prepared plan is invalidated when the +# secondary index's constraints change. +# ------------------------------------------------------------------------------ + +statement +PREPARE p AS SELECT tree, field, description FROM [EXPLAIN SELECT k, v FROM t WHERE k=10] + +query TTT +EXECUTE p +---- +scan · · +· table t@primary +· spans /10-/10/# + +statement ok +ALTER TABLE t CONFIGURE ZONE USING constraints='[+region=test,+dc=dc2]' + +statement ok +ALTER INDEX t@secondary CONFIGURE ZONE USING constraints='[+region=test,+dc=dc1]' + +query TTT +EXECUTE p +---- +scan · · +· table t@secondary +· spans /10-/11 diff --git a/pkg/config/zone.go b/pkg/config/zone.go index cb587b4faf69..f87ec352e3b8 100644 --- a/pkg/config/zone.go +++ b/pkg/config/zone.go @@ -729,13 +729,13 @@ func (z ZoneConfig) subzoneSplits() []roachpb.RKey { } // ReplicaConstraintsCount is part of the cat.Zone interface. -func (zc *ZoneConfig) ReplicaConstraintsCount() int { - return len(zc.Constraints) +func (z *ZoneConfig) ReplicaConstraintsCount() int { + return len(z.Constraints) } // ReplicaConstraints is part of the cat.Zone interface. -func (zc *ZoneConfig) ReplicaConstraints(i int) cat.ReplicaConstraints { - return &zc.Constraints[i] +func (z *ZoneConfig) ReplicaConstraints(i int) cat.ReplicaConstraints { + return &z.Constraints[i] } // ReplicaCount is part of the cat.ReplicaConstraints interface. diff --git a/pkg/server/server.go b/pkg/server/server.go index 7b6b2285b121..3db1ea96b5b8 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -600,6 +600,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { execCfg = sql.ExecutorConfig{ Settings: s.st, NodeInfo: nodeInfo, + Locality: s.cfg.Locality, AmbientCtx: s.cfg.AmbientCtx, DB: s.db, Gossip: s.gossip, diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index 778de7f2a14e..111409cc55f2 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -1816,6 +1816,7 @@ func (ex *connExecutor) initEvalCtx(ctx context.Context, evalCtx *extendedEvalCo TestingKnobs: ex.server.cfg.EvalContextTestingKnobs, ClusterID: ex.server.cfg.ClusterID(), NodeID: ex.server.cfg.NodeID.Get(), + Locality: ex.server.cfg.Locality, ReCache: ex.server.reCache, InternalExecutor: ie, }, diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index e8644528477c..cd276ec6f03b 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -331,6 +331,7 @@ type nodeStatusGenerator interface { type ExecutorConfig struct { Settings *cluster.Settings NodeInfo + Locality roachpb.Locality AmbientCtx log.AmbientContext DB *client.DB Gossip *gossip.Gossip diff --git a/pkg/sql/logictest/testdata/logic_test/crdb_internal b/pkg/sql/logictest/testdata/logic_test/crdb_internal index d829f55228a6..2d82a91866c3 100644 --- a/pkg/sql/logictest/testdata/logic_test/crdb_internal +++ b/pkg/sql/logictest/testdata/logic_test/crdb_internal @@ -290,8 +290,8 @@ node_id component field value query ITTTTT colnames SELECT node_id, network, regexp_replace(address, '\d+$', '') as address, attrs, locality, regexp_replace(server_version, '^\d+\.\d+(-\d+)?$', '') as server_version FROM crdb_internal.gossip_nodes WHERE node_id = 1 ---- -node_id network address attrs locality server_version -1 tcp 127.0.0.1: [] {"region": "test"} +node_id network address attrs locality server_version +1 tcp 127.0.0.1: [] {"dc": "dc1", "region": "test"} query IITBB colnames SELECT node_id, epoch, regexp_replace(expiration, '^\d+\.\d+,\d+$', '') as expiration, draining, decommissioning FROM crdb_internal.gossip_liveness WHERE node_id = 1 @@ -303,8 +303,8 @@ query ITTTTTT colnames SELECT node_id, network, regexp_replace(address, '\d+$', '') as address, attrs, locality, regexp_replace(server_version, '^\d+\.\d+(-\d+)?$', '') as server_version, regexp_replace(go_version, '^go.+$', '') as go_version FROM crdb_internal.kv_node_status WHERE node_id = 1 ---- -node_id network address attrs locality server_version go_version -1 tcp 127.0.0.1: [] {"region": "test"} +node_id network address attrs locality server_version go_version +1 tcp 127.0.0.1: [] {"dc": "dc1", "region": "test"} query IITI colnames SELECT node_id, store_id, attrs, used diff --git a/pkg/sql/opt/exec/execbuilder/testdata/distsql_agg b/pkg/sql/opt/exec/execbuilder/testdata/distsql_agg index 4a875cff898b..5f0f452e0235 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/distsql_agg +++ b/pkg/sql/opt/exec/execbuilder/testdata/distsql_agg @@ -337,7 +337,7 @@ group-by ├── grouping columns: b:2 ├── internal-ordering: +2 opt(1) ├── stats: [rows=9.5617925, distinct(2)=9.5617925, null(2)=0] - ├── cost: 10.7156179 + ├── cost: 11.1156179 ├── key: (2) ├── fd: (2)-->(3) ├── prune: (3) @@ -345,7 +345,7 @@ group-by │ ├── columns: a:1 b:2 │ ├── constraint: /1/2: [/1 - /1] │ ├── stats: [rows=10, distinct(1)=1, null(1)=0, distinct(2)=9.5617925, null(2)=0] - │ ├── cost: 10.41 + │ ├── cost: 10.81 │ ├── key: (2) │ ├── fd: ()-->(1) │ ├── ordering: +2 opt(1) [actual: +2] diff --git a/pkg/sql/opt/exec/execbuilder/testdata/explain b/pkg/sql/opt/exec/execbuilder/testdata/explain index 7df851660c84..60a797aa7460 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/explain +++ b/pkg/sql/opt/exec/execbuilder/testdata/explain @@ -647,21 +647,21 @@ EXPLAIN (OPT,VERBOSE) SELECT * FROM tc WHERE a = 10 ORDER BY b sort ├── columns: a:1 b:2 ├── stats: [rows=9.9, distinct(1)=1, null(1)=0] - ├── cost: 51.3728708 + ├── cost: 52.2638708 ├── fd: ()-->(1) ├── ordering: +2 opt(1) [actual: +2] ├── prune: (2) └── index-join tc ├── columns: a:1 b:2 ├── stats: [rows=9.9, distinct(1)=1, null(1)=0] - ├── cost: 50.51 + ├── cost: 51.401 ├── fd: ()-->(1) ├── prune: (2) └── scan tc@c ├── columns: a:1 rowid:3 ├── constraint: /1/3: [/10 - /10] ├── stats: [rows=9.9, distinct(1)=1, null(1)=0, distinct(3)=9.9, null(3)=0] - ├── cost: 10.306 + ├── cost: 10.702 ├── key: (3) └── fd: ()-->(1) @@ -671,21 +671,21 @@ EXPLAIN (OPT,TYPES) SELECT * FROM tc WHERE a = 10 ORDER BY b sort ├── columns: a:1(int!null) b:2(int) ├── stats: [rows=9.9, distinct(1)=1, null(1)=0] - ├── cost: 51.3728708 + ├── cost: 52.2638708 ├── fd: ()-->(1) ├── ordering: +2 opt(1) [actual: +2] ├── prune: (2) └── index-join tc ├── columns: a:1(int!null) b:2(int) ├── stats: [rows=9.9, distinct(1)=1, null(1)=0] - ├── cost: 50.51 + ├── cost: 51.401 ├── fd: ()-->(1) ├── prune: (2) └── scan tc@c ├── columns: a:1(int!null) rowid:3(int!null) ├── constraint: /1/3: [/10 - /10] ├── stats: [rows=9.9, distinct(1)=1, null(1)=0, distinct(3)=9.9, null(3)=0] - ├── cost: 10.306 + ├── cost: 10.702 ├── key: (3) └── fd: ()-->(1) @@ -707,24 +707,24 @@ EXPLAIN (OPT, VERBOSE) SELECT * FROM tc WHERE a + 2 * b > 1 ORDER BY a*b sort ├── columns: a:1 b:2 [hidden: column4:4] ├── stats: [rows=333.333333] - ├── cost: 1129.24548 + ├── cost: 1179.24548 ├── fd: (1,2)-->(4) ├── ordering: +4 ├── prune: (1,2,4) └── project ├── columns: column4:4 a:1 b:2 ├── stats: [rows=333.333333] - ├── cost: 1066.69667 + ├── cost: 1116.69667 ├── fd: (1,2)-->(4) ├── prune: (1,2,4) ├── select │ ├── columns: a:1 b:2 │ ├── stats: [rows=333.333333] - │ ├── cost: 1060.02 + │ ├── cost: 1110.02 │ ├── scan tc │ │ ├── columns: a:1 b:2 │ │ ├── stats: [rows=1000] - │ │ ├── cost: 1050.01 + │ │ ├── cost: 1100.01 │ │ └── prune: (1,2) │ └── filters │ └── (a + (b * 2)) > 1 [outer=(1,2)] @@ -737,24 +737,24 @@ EXPLAIN (OPT, TYPES) SELECT * FROM tc WHERE a + 2 * b > 1 ORDER BY a*b sort ├── columns: a:1(int) b:2(int) [hidden: column4:4(int)] ├── stats: [rows=333.333333] - ├── cost: 1129.24548 + ├── cost: 1179.24548 ├── fd: (1,2)-->(4) ├── ordering: +4 ├── prune: (1,2,4) └── project ├── columns: column4:4(int) a:1(int) b:2(int) ├── stats: [rows=333.333333] - ├── cost: 1066.69667 + ├── cost: 1116.69667 ├── fd: (1,2)-->(4) ├── prune: (1,2,4) ├── select │ ├── columns: a:1(int) b:2(int) │ ├── stats: [rows=333.333333] - │ ├── cost: 1060.02 + │ ├── cost: 1110.02 │ ├── scan tc │ │ ├── columns: a:1(int) b:2(int) │ │ ├── stats: [rows=1000] - │ │ ├── cost: 1050.01 + │ │ ├── cost: 1100.01 │ │ └── prune: (1,2) │ └── filters │ └── gt [type=bool, outer=(1,2)] diff --git a/pkg/sql/opt/testutils/opttester/opt_tester.go b/pkg/sql/opt/testutils/opttester/opt_tester.go index 33c93a759855..0ec704c846e0 100644 --- a/pkg/sql/opt/testutils/opttester/opt_tester.go +++ b/pkg/sql/opt/testutils/opttester/opt_tester.go @@ -24,6 +24,7 @@ import ( "testing" "text/tabwriter" + "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/opt" "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" @@ -122,6 +123,15 @@ type Flags struct { // ReorderJoinsLimit is the maximum number of joins in a query which the optimizer // should attempt to reorder. JoinLimit int + + // Locality specifies the location of the planning node as a set of user- + // defined key/value pairs, ordered from most inclusive to least inclusive. + // If there are no tiers, then the node's location is not known. Examples: + // + // [region=eu] + // [region=us,dc=east] + // + Locality roachpb.Locality } // New constructs a new instance of the OptTester for the given SQL statement. @@ -223,6 +233,10 @@ func New(catalog cat.Catalog, sql string) *OptTester { // expression in the query tree for the purpose of creating alternate query // plans in the optimizer. // +// - locality: used to set the locality of the node that plans the query. This +// can effect costing when there are multiple possible indexes to choose +// from, each in different localities. +// func (ot *OptTester) RunCommand(tb testing.TB, d *datadriven.TestData) string { // Allow testcases to override the flags. for _, a := range d.CmdArgs { @@ -240,6 +254,7 @@ func (ot *OptTester) RunCommand(tb testing.TB, d *datadriven.TestData) string { ot.Flags.Verbose = testing.Verbose() ot.evalCtx.TestingKnobs.OptimizerCostPerturbation = ot.Flags.PerturbCost + ot.evalCtx.Locality = ot.Flags.Locality switch d.Cmd { case "exec-ddl": @@ -509,6 +524,14 @@ func (f *Flags) Set(arg datadriven.CmdArg) error { return err } + case "locality": + // Recombine multiple arguments, separated by commas. + locality := strings.Join(arg.Vals, ",") + err := f.Locality.Set(locality) + if err != nil { + return err + } + default: return fmt.Errorf("unknown argument: %s", arg.Key) } diff --git a/pkg/sql/opt/xform/coster.go b/pkg/sql/opt/xform/coster.go index 3e34966baf19..edea07a45342 100644 --- a/pkg/sql/opt/xform/coster.go +++ b/pkg/sql/opt/xform/coster.go @@ -19,11 +19,13 @@ import ( "math" "math/rand" + "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/sql/opt" "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" "github.com/cockroachdb/cockroach/pkg/sql/opt/ordering" "github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" ) // Coster is used by the optimizer to assign a cost to a candidate expression @@ -31,6 +33,21 @@ import ( // expression has a lower cost than any other expression in the memo group, then // it becomes the new best expression for the group. // +// The set of costing formulas maintained by the coster for the set of all +// operators constitute the "cost model". A given cost model can be designed to +// maximize any optimization goal, such as: +// +// 1. Max aggregate cluster throughput (txns/sec across cluster) +// 2. Min transaction latency (time to commit txns) +// 3. Min latency to first row (time to get first row of txns) +// 4. Min memory usage +// 5. Some weighted combination of #1 - #4 +// +// The cost model in this file targets #1 as the optimization goal. However, +// note that #2 is implicitly important to that goal, since overall cluster +// throughput will suffer if there are lots of pending transactions waiting on +// I/O. +// // Coster is an interface so that different costing algorithms can be used by // the optimizer. For example, the OptSteps command uses a custom coster that // assigns infinite costs to some expressions in order to prevent them from @@ -52,6 +69,14 @@ type Coster interface { type coster struct { mem *memo.Memo + // locality gives the location of the current node as a set of user-defined + // key/value pairs, ordered from most inclusive to least inclusive. If there + // are no tiers, then the node's location is not known. Example: + // + // [region=us,dc=east] + // + locality roachpb.Locality + // perturbation indicates how much to randomly perturb the cost. It is used // to generate alternative plans for testing. For example, if perturbation is // 0.5, and the estimated cost of an expression is c, the cost returned by @@ -77,6 +102,18 @@ const ( seqIOCostFactor = 1 randIOCostFactor = 4 + // latencyCostFactor represents the throughput impact of doing scans on index + // that may be remotely located in a different locality. If latencies are + // higher, then overall cluster throughput will suffer somewhat, as there will + // be more queries in memory blocking on I/O. The impact on throughput is + // expected to be relatively low, so latencyCostFactor is set to a small + // value. However, even a low value will cause the optimizer to prefer + // indexes that are likely to be geographically closer, if they are otherwise + // the same cost to access. + // TODO(andyk): Need to do analysis to figure out right value and/or to come + // up with better way to incorporate latency into the coster. + latencyCostFactor = cpuCostFactor + // hugeCost is used with expressions we want to avoid; these are expressions // that "violate" a hint like forcing a specific index or join algorithm. // If the final expression has this cost or larger, it means that there was no @@ -85,8 +122,9 @@ const ( ) // Init initializes a new coster structure with the given memo. -func (c *coster) Init(mem *memo.Memo, perturbation float64) { +func (c *coster) Init(evalCtx *tree.EvalContext, mem *memo.Memo, perturbation float64) { c.mem = mem + c.locality = evalCtx.Locality c.perturbation = perturbation } @@ -446,16 +484,92 @@ func (c *coster) rowSortCost(numKeyCols int) memo.Cost { return memo.Cost(cost) } +// localityMatchScore returns a number from 0.0 to 1.0 that describes how well +// the current node's locality matches the given zone constraints, with 0.0 +// indicating 0% and 1.0 indicating 100%. In order to match, each successive +// locality tier must match at least one REQUIRED constraint and not match any +// PROHIBITED constraints. If a locality tier does not match, then tiers after +// it do not match either. For example: +// +// Locality = [region=us,dc=east] +// 0.0 = [] +// 0.0 = [+region=eu,+dc=uk] +// 0.0 = [-region=us] +// 0.0 = [+region=eu,+dc=east] +// 0.5 = [+region=us,+dc=west] +// 0.5 = [+region=us,-dc=east] +// 1.0 = [+region=us,+dc=east] +// 1.0 = [+region=us,+dc=east,+rack=1,-ssd] +// +func (c *coster) localityMatchScore(zone cat.Zone) float64 { + // If there are no replica constraints, then locality can't match. + if zone.ReplicaConstraintsCount() == 0 { + return 0.0 + } + + // matchTier returns true if it can locate a required constraint that matches + // the given tier. + matchConstraints := func(zc cat.ReplicaConstraints, tier *roachpb.Tier) bool { + for i, n := 0, zc.ConstraintCount(); i < n; i++ { + con := zc.Constraint(i) + if tier.Key == con.GetKey() && tier.Value == con.GetValue() { + // If this is a required constraint, then it matches, and no need to + // iterate further. If it's prohibited, then it cannot match, so no + // need to go further. + return con.IsRequired() + } + } + return false + } + + // matchReplConstraints returns true if all replica constraints match the + // given tier. + matchReplConstraints := func(zone cat.Zone, tier *roachpb.Tier) bool { + for i, n := 0, zone.ReplicaConstraintsCount(); i < n; i++ { + replCon := zone.ReplicaConstraints(i) + if !matchConstraints(replCon, tier) { + return false + } + } + return true + } + + // Keep iterating until non-matching tier is found, or all tiers are found to + // match. + matchCount := 0 + for i := range c.locality.Tiers { + if !matchReplConstraints(zone, &c.locality.Tiers[i]) { + break + } + matchCount++ + } + + return 1.0 * float64(matchCount) / float64(len(c.locality.Tiers)) +} + // rowScanCost is the CPU cost to scan one row, which depends on the number of // columns in the index and (to a lesser extent) on the number of columns we are // scanning. -func (c *coster) rowScanCost(table opt.TableID, index int, numScannedCols int) memo.Cost { +func (c *coster) rowScanCost(tabID opt.TableID, idxOrd int, numScannedCols int) memo.Cost { md := c.mem.Metadata() - numCols := md.Table(table).Index(index).ColumnCount() + tab := md.Table(tabID) + idx := tab.Index(idxOrd) + numCols := idx.ColumnCount() + + // Adjust cost based on how well the current locality matches the index's + // zone constraints. + var costFactor memo.Cost = cpuCostFactor + if len(c.locality.Tiers) != 0 { + // If 0% of locality tiers have matching constraints, then add additional + // cost. If 100% of locality tiers have matching constraints, then add no + // additional cost. Anything in between scales linearly with the number of + // matches. + costFactor += latencyCostFactor * memo.Cost(1.0-c.localityMatchScore(idx.Zone())) + } // The number of the columns in the index matter because more columns means // more data to scan. The number of columns we actually return also matters // because that is the amount of data that we could potentially transfer over // the network. - return memo.Cost(numCols+numScannedCols) * cpuCostFactor + return memo.Cost(numCols+numScannedCols) * costFactor } diff --git a/pkg/sql/opt/xform/optimizer.go b/pkg/sql/opt/xform/optimizer.go index 9da76819cfca..b0d684644cd7 100644 --- a/pkg/sql/opt/xform/optimizer.go +++ b/pkg/sql/opt/xform/optimizer.go @@ -101,7 +101,7 @@ func (o *Optimizer) Init(evalCtx *tree.EvalContext) { o.f.Init(evalCtx) o.mem = o.f.Memo() o.explorer.init(o) - o.defaultCoster.Init(o.mem, evalCtx.TestingKnobs.OptimizerCostPerturbation) + o.defaultCoster.Init(evalCtx, o.mem, evalCtx.TestingKnobs.OptimizerCostPerturbation) o.coster = &o.defaultCoster o.stateMap = make(map[groupStateKey]*groupState) o.matchedRule = nil @@ -879,7 +879,7 @@ func (o *Optimizer) FormatMemo(flags FmtFlags) string { // the real computed cost, not the perturbed cost. func (o *Optimizer) RecomputeCost() { var c coster - c.Init(o.mem, 0 /* perturbation */) + c.Init(o.evalCtx, o.mem, 0 /* perturbation */) root := o.mem.RootExpr() rootProps := o.mem.RootProps() diff --git a/pkg/sql/opt/xform/testdata/coster/zone b/pkg/sql/opt/xform/testdata/coster/zone new file mode 100644 index 000000000000..57bbf5588eea --- /dev/null +++ b/pkg/sql/opt/xform/testdata/coster/zone @@ -0,0 +1,377 @@ +exec-ddl +CREATE TABLE abc ( + a INT PRIMARY KEY, + b INT, + c STRING, + UNIQUE INDEX bc1 (b, c), + UNIQUE INDEX bc2 (b, c) +) +---- +TABLE abc + ├── a int not null + ├── b int + ├── c string + ├── INDEX primary + │ └── a int not null + ├── INDEX bc1 + │ ├── b int + │ ├── c string + │ └── a int not null (storing) + └── INDEX bc2 + ├── b int + ├── c string + └── a int not null (storing) + +exec-ddl +CREATE TABLE xy ( + x INT PRIMARY KEY, + y INT, + INDEX y1 (y), + INDEX y2 (y) +) +---- +TABLE xy + ├── x int not null + ├── y int + ├── INDEX primary + │ └── x int not null + ├── INDEX y1 + │ ├── y int + │ └── x int not null + └── INDEX y2 + ├── y int + └── x int not null + +# -------------------------------------------------- +# Single constraints. +# -------------------------------------------------- + +exec-ddl +ALTER INDEX abc@bc1 CONFIGURE ZONE USING constraints='[+region=east]' +---- +ZONE + └── constraints: [+region=east] + +exec-ddl +ALTER INDEX abc@bc2 CONFIGURE ZONE USING constraints='[+region=west]' +---- +ZONE + └── constraints: [+region=west] + +# With locality in east, use bc1 index. +opt format=show-all locality=(region=east) +SELECT b, c FROM abc where b=10 +---- +scan t.public.abc@bc1 + ├── columns: b:2(int!null) c:3(string) + ├── constraint: /2/3: [/10 - /10] + ├── stats: [rows=9.9, distinct(2)=1, null(2)=0] + ├── cost: 10.405 + ├── lax-key: (3) + ├── fd: ()-->(2) + ├── prune: (3) + └── interesting orderings: (+2,+3) + +# With locality in west, use bc2 index. +opt format=show-all locality=(region=west) +SELECT b, c FROM abc where b=10 +---- +scan t.public.abc@bc2 + ├── columns: b:2(int!null) c:3(string) + ├── constraint: /2/3: [/10 - /10] + ├── stats: [rows=9.9, distinct(2)=1, null(2)=0] + ├── cost: 10.405 + ├── lax-key: (3) + ├── fd: ()-->(2) + ├── prune: (3) + └── interesting orderings: (+2,+3) + +# No locality, so use bc1, since it's first. +opt format=show-all +SELECT b, c FROM abc where b=10 +---- +scan t.public.abc@bc1 + ├── columns: b:2(int!null) c:3(string) + ├── constraint: /2/3: [/10 - /10] + ├── stats: [rows=9.9, distinct(2)=1, null(2)=0] + ├── cost: 10.405 + ├── lax-key: (3) + ├── fd: ()-->(2) + ├── prune: (3) + └── interesting orderings: (+2,+3) + +# Locality doesn't match any constraints, so use bc1, since it's first. +opt format=show-all locality=(region=central) +SELECT b, c FROM abc where b=10 +---- +scan t.public.abc@bc1 + ├── columns: b:2(int!null) c:3(string) + ├── constraint: /2/3: [/10 - /10] + ├── stats: [rows=9.9, distinct(2)=1, null(2)=0] + ├── cost: 10.9 + ├── lax-key: (3) + ├── fd: ()-->(2) + ├── prune: (3) + └── interesting orderings: (+2,+3) + +# -------------------------------------------------- +# Multiple constraints. +# -------------------------------------------------- + +exec-ddl +ALTER INDEX abc@bc1 CONFIGURE ZONE USING constraints='[+region=us,+dc=east,+rack=1]' +---- +ZONE + └── constraints: [+region=us,+dc=east,+rack=1] + +exec-ddl +ALTER INDEX abc@bc2 CONFIGURE ZONE USING constraints='[+region=us,+dc=west,+rack=1]' +---- +ZONE + └── constraints: [+region=us,+dc=west,+rack=1] + +# With locality in us + east, use bc1 index. +opt format=show-all locality=(region=us,dc=east) +SELECT b, c FROM abc where b=10 +---- +scan t.public.abc@bc1 + ├── columns: b:2(int!null) c:3(string) + ├── constraint: /2/3: [/10 - /10] + ├── stats: [rows=9.9, distinct(2)=1, null(2)=0] + ├── cost: 10.405 + ├── lax-key: (3) + ├── fd: ()-->(2) + ├── prune: (3) + └── interesting orderings: (+2,+3) + +# With locality in us + west, use bc2 index. +opt format=show-all locality=(region=us,dc=west) +SELECT b, c FROM abc where b=10 +---- +scan t.public.abc@bc2 + ├── columns: b:2(int!null) c:3(string) + ├── constraint: /2/3: [/10 - /10] + ├── stats: [rows=9.9, distinct(2)=1, null(2)=0] + ├── cost: 10.405 + ├── lax-key: (3) + ├── fd: ()-->(2) + ├── prune: (3) + └── interesting orderings: (+2,+3) + +# Ignore "dc=west,rack=1" match if "region" does not match. +opt format=show-all locality=(region=eu,dc=west,rack=1) +SELECT b, c FROM abc where b=10 +---- +scan t.public.abc@bc1 + ├── columns: b:2(int!null) c:3(string) + ├── constraint: /2/3: [/10 - /10] + ├── stats: [rows=9.9, distinct(2)=1, null(2)=0] + ├── cost: 10.9 + ├── lax-key: (3) + ├── fd: ()-->(2) + ├── prune: (3) + └── interesting orderings: (+2,+3) + +# -------------------------------------------------- +# Multiple replica constraints. +# -------------------------------------------------- + +exec-ddl +ALTER INDEX abc@bc1 CONFIGURE ZONE USING constraints='{"+region=us,+dc=east":2, "+region=us,+dc=west":1}' +---- +ZONE + └── replica constraints + ├── 2 replicas: [+region=us,+dc=east] + └── 1 replicas: [+region=us,+dc=west] + +exec-ddl +ALTER INDEX abc@bc2 CONFIGURE ZONE USING constraints='[+region=us,+dc=east]' +---- +ZONE + └── constraints: [+region=us,+dc=east] + +# With locality in us, use bc1 index, since only one tier matches in case of +# both indexes. +opt format=show-all locality=(region=us) +SELECT b, c FROM abc where b=10 +---- +scan t.public.abc@bc1 + ├── columns: b:2(int!null) c:3(string) + ├── constraint: /2/3: [/10 - /10] + ├── stats: [rows=9.9, distinct(2)=1, null(2)=0] + ├── cost: 10.405 + ├── lax-key: (3) + ├── fd: ()-->(2) + ├── prune: (3) + └── interesting orderings: (+2,+3) + +# With locality in us + east, use bc2 index (use lowest match count when +# replicas have different numbers of matches). +opt format=show-all locality=(region=us,dc=east) +SELECT b, c FROM abc where b=10 +---- +scan t.public.abc@bc2 + ├── columns: b:2(int!null) c:3(string) + ├── constraint: /2/3: [/10 - /10] + ├── stats: [rows=9.9, distinct(2)=1, null(2)=0] + ├── cost: 10.405 + ├── lax-key: (3) + ├── fd: ()-->(2) + ├── prune: (3) + └── interesting orderings: (+2,+3) + +# -------------------------------------------------- +# Complex constraints. +# -------------------------------------------------- + +exec-ddl +ALTER INDEX abc@bc1 CONFIGURE ZONE USING constraints='[+region=us,-region=eu,+region=ap]' +---- +ZONE + └── constraints: [+region=us,-region=eu,+region=ap] + +exec-ddl +ALTER INDEX abc@bc2 CONFIGURE ZONE USING constraints='[+region=eu,+region=us,+dc=east]' +---- +ZONE + └── constraints: [+region=eu,+region=us,+dc=east] + +# With locality in us, use bc1, since it's first in order. +opt format=show-all locality=(region=us) +SELECT b, c FROM abc where b=10 +---- +scan t.public.abc@bc1 + ├── columns: b:2(int!null) c:3(string) + ├── constraint: /2/3: [/10 - /10] + ├── stats: [rows=9.9, distinct(2)=1, null(2)=0] + ├── cost: 10.405 + ├── lax-key: (3) + ├── fd: ()-->(2) + ├── prune: (3) + └── interesting orderings: (+2,+3) + +# With locality in eu, use bc2, since it's prohibited with bc1. +opt format=show-all locality=(region=eu) +SELECT b, c FROM abc where b=10 +---- +scan t.public.abc@bc2 + ├── columns: b:2(int!null) c:3(string) + ├── constraint: /2/3: [/10 - /10] + ├── stats: [rows=9.9, distinct(2)=1, null(2)=0] + ├── cost: 10.405 + ├── lax-key: (3) + ├── fd: ()-->(2) + ├── prune: (3) + └── interesting orderings: (+2,+3) + +# With locality in us + east, use bc2, since it matches both tiers, even though +# "us" match is after "eu" in list. +opt format=show-all locality=(region=us,dc=east) +SELECT b, c FROM abc where b=10 +---- +scan t.public.abc@bc2 + ├── columns: b:2(int!null) c:3(string) + ├── constraint: /2/3: [/10 - /10] + ├── stats: [rows=9.9, distinct(2)=1, null(2)=0] + ├── cost: 10.405 + ├── lax-key: (3) + ├── fd: ()-->(2) + ├── prune: (3) + └── interesting orderings: (+2,+3) + +# -------------------------------------------------- +# Lookup join. +# -------------------------------------------------- + +exec-ddl +ALTER INDEX abc@bc1 CONFIGURE ZONE USING constraints='[+region=us,+dc=east]' +---- +ZONE + └── constraints: [+region=us,+dc=east] + +exec-ddl +ALTER INDEX abc@bc2 CONFIGURE ZONE USING constraints='[+region=us,+dc=west]' +---- +ZONE + └── constraints: [+region=us,+dc=west] + +exec-ddl +ALTER INDEX xy@y1 CONFIGURE ZONE USING constraints='[+region=us,+dc=east]' +---- +ZONE + └── constraints: [+region=us,+dc=east] + +exec-ddl +ALTER INDEX xy@y2 CONFIGURE ZONE USING constraints='[+region=us,+dc=west]' +---- +ZONE + └── constraints: [+region=us,+dc=west] + +# Ensure that both indexes involved in the lookup join are selected from the +# "west" data center. +opt format=show-all locality=(region=us,dc=west) +SELECT * FROM abc INNER LOOKUP JOIN xy ON b=y WHERE b=1 +---- +inner-join (lookup xy@y2) + ├── columns: a:1(int!null) b:2(int!null) c:3(string) x:4(int!null) y:5(int!null) + ├── key columns: [2] = [5] + ├── stats: [rows=98.01, distinct(1)=9.9, null(1)=0, distinct(2)=1, null(2)=0, distinct(4)=9.9, null(4)=0, distinct(5)=1, null(5)=0] + ├── cost: 152.0444 + ├── key: (1,4) + ├── fd: ()-->(2,5), (1)-->(3), (2,3)~~>(1), (2)==(5), (5)==(2) + ├── prune: (1,3,4) + ├── interesting orderings: (+1) (+2,+3,+1) + ├── scan t.public.abc@bc2 + │ ├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(string) + │ ├── constraint: /2/3: [/1 - /1] + │ ├── stats: [rows=9.9, distinct(1)=9.9, null(1)=0, distinct(2)=1, null(2)=0] + │ ├── cost: 10.504 + │ ├── key: (1) + │ ├── fd: ()-->(2), (1)-->(3), (2,3)~~>(1) + │ ├── prune: (1,3) + │ └── interesting orderings: (+1) (+2,+3,+1) + └── filters + └── eq [type=bool, outer=(5), constraints=(/5: [/1 - /1]; tight), fd=()-->(5)] + ├── variable: t.public.xy.y [type=int] + └── const: 1 [type=int] + +# Switch the data center for the target lookup join index. + +exec-ddl +ALTER INDEX xy@y1 CONFIGURE ZONE USING constraints='[+region=us,+dc=west]' +---- +ZONE + └── constraints: [+region=us,+dc=west] + +exec-ddl +ALTER INDEX xy@y2 CONFIGURE ZONE USING constraints='[+region=us,+dc=east]' +---- +ZONE + └── constraints: [+region=us,+dc=east] + +# Should use other index now. +opt format=show-all locality=(region=us,dc=west) +SELECT * FROM abc INNER LOOKUP JOIN xy ON b=y WHERE b=1 +---- +inner-join (lookup xy@y1) + ├── columns: a:1(int!null) b:2(int!null) c:3(string) x:4(int!null) y:5(int!null) + ├── key columns: [2] = [5] + ├── stats: [rows=98.01, distinct(1)=9.9, null(1)=0, distinct(2)=1, null(2)=0, distinct(4)=9.9, null(4)=0, distinct(5)=1, null(5)=0] + ├── cost: 152.0444 + ├── key: (1,4) + ├── fd: ()-->(2,5), (1)-->(3), (2,3)~~>(1), (2)==(5), (5)==(2) + ├── prune: (1,3,4) + ├── interesting orderings: (+1) (+2,+3,+1) + ├── scan t.public.abc@bc2 + │ ├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(string) + │ ├── constraint: /2/3: [/1 - /1] + │ ├── stats: [rows=9.9, distinct(1)=9.9, null(1)=0, distinct(2)=1, null(2)=0] + │ ├── cost: 10.504 + │ ├── key: (1) + │ ├── fd: ()-->(2), (1)-->(3), (2,3)~~>(1) + │ ├── prune: (1,3) + │ └── interesting orderings: (+1) (+2,+3,+1) + └── filters + └── eq [type=bool, outer=(5), constraints=(/5: [/1 - /1]; tight), fd=()-->(5)] + ├── variable: t.public.xy.y [type=int] + └── const: 1 [type=int] diff --git a/pkg/sql/opt_catalog.go b/pkg/sql/opt_catalog.go index 6828c397ceb6..3c2759e619d5 100644 --- a/pkg/sql/opt_catalog.go +++ b/pkg/sql/opt_catalog.go @@ -55,8 +55,12 @@ var _ cat.Catalog = &optCatalog{} // init allows the caller to pre-allocate optCatalog. func (oc *optCatalog) init(planner *planner) { oc.planner = planner - oc.cfg = planner.execCfg.Gossip.GetSystemConfig() oc.dataSources = nil + + // Gossip can be nil in testing scenarios. + if planner.execCfg.Gossip != nil { + oc.cfg = planner.execCfg.Gossip.GetSystemConfig() + } } // optSchema is a wrapper around sqlbase.DatabaseDescriptor that implements the diff --git a/pkg/sql/planner.go b/pkg/sql/planner.go index 58c7f0393535..83790b4d6597 100644 --- a/pkg/sql/planner.go +++ b/pkg/sql/planner.go @@ -261,6 +261,7 @@ func newInternalPlanner( p.extendedEvalCtx.Sequence = p p.extendedEvalCtx.ClusterID = execCfg.ClusterID() p.extendedEvalCtx.NodeID = execCfg.NodeID.Get() + p.extendedEvalCtx.Locality = execCfg.Locality p.sessionDataMutator = dataMutator p.autoCommit = false diff --git a/pkg/sql/sem/tree/eval.go b/pkg/sql/sem/tree/eval.go index f5ab159a111f..49ed501539df 100644 --- a/pkg/sql/sem/tree/eval.go +++ b/pkg/sql/sem/tree/eval.go @@ -2489,6 +2489,15 @@ type EvalContext struct { Settings *cluster.Settings ClusterID uuid.UUID NodeID roachpb.NodeID + + // Locality contains the location of the current node as a set of user-defined + // key/value pairs, ordered from most inclusive to least inclusive. If there + // are no tiers, then the node's location is not known. Example: + // + // [region=us,dc=east] + // + Locality roachpb.Locality + // The statement timestamp. May be different for every statement. // Used for statement_timestamp(). StmtTimestamp time.Time diff --git a/pkg/sql/zone_config.go b/pkg/sql/zone_config.go index 9fc9a1bebc0f..98d80678890e 100644 --- a/pkg/sql/zone_config.go +++ b/pkg/sql/zone_config.go @@ -106,12 +106,12 @@ func getZoneConfig( return 0, nil, 0, nil, errNoZoneConfigApplies } -// CompleteZoneConfig takes a zone config pointer and fills in the +// completeZoneConfig takes a zone config pointer and fills in the // missing fields by following the chain of inheritance. // In the worst case, will have to inherit from the default zone config. // NOTE: This will not work for subzones. To complete subzones, find a complete // parent zone (index or table) and apply InheritFromParent to it. -func CompleteZoneConfig( +func completeZoneConfig( cfg *config.ZoneConfig, id uint32, getKey func(roachpb.Key) (*roachpb.Value, error), ) error { if cfg.IsComplete() { @@ -163,7 +163,7 @@ func ZoneConfigHook( } else if err != nil { return nil, nil, false, err } - if err = CompleteZoneConfig(zone, zoneID, getKey); err != nil { + if err = completeZoneConfig(zone, zoneID, getKey); err != nil { return nil, nil, false, err } return zone, placeholder, true, nil @@ -191,7 +191,7 @@ func GetZoneConfigInTxn( if err != nil { return 0, nil, nil, err } - if err = CompleteZoneConfig(zone, zoneID, getKey); err != nil { + if err = completeZoneConfig(zone, zoneID, getKey); err != nil { return 0, nil, nil, err } var subzone *config.Subzone diff --git a/pkg/testutils/testcluster/testcluster.go b/pkg/testutils/testcluster/testcluster.go index 281fc6bb4461..d7d7a43831ba 100644 --- a/pkg/testutils/testcluster/testcluster.go +++ b/pkg/testutils/testcluster/testcluster.go @@ -151,6 +151,17 @@ func StartTestCluster(t testing.TB, nodes int, args base.TestClusterArgs) *TestC } else { serverArgs = args.ServerArgs } + + // If there are multiple nodes, place them in different localities by + // default. + if nodes > 0 { + tiers := []roachpb.Tier{ + {Key: "region", Value: "test"}, + {Key: "dc", Value: fmt.Sprintf("dc%d", i+1)}, + } + serverArgs.Locality = roachpb.Locality{Tiers: tiers} + } + if i > 0 { serverArgs.JoinAddr = tc.Servers[0].ServingAddr() }