Skip to content

Commit

Permalink
sql: clarify/fix the meaning of "Datum comparisons"
Browse files Browse the repository at this point in the history
Prior to this patch, the interface method `(Datum).Compare()` had the
dual role to *order* datums (which requires a total relation) and
to *compare* datums in the SQL logical sense where NULLs have a
special behavior.

Contrary to intuition, the two relations are not derivable from each
other and need separate code. The implementation of the `Compare()`
method is currently tailored towards ordering (although it also
contains a bug in that role) and, meanwhile, does not suitably perform
SQL logical comparisons.

So there two groups of issues at hand:

- every caller of `Compare()` which is interested in a SQL logical
  comparison other than "distinctness" is mistaken+erroneous.

- every caller of `Compare()` which is interested in ordering is
  currently subject to a bug/limitation of `Compare()` in some edge
  cases involving tuples of arrays containing NULL elements.

The fix to the 2nd issue, which will come in a later patch, will cause
the ordering function to become *completely* inadequate to perform SQL
logical comparisons. This, in turn, will really require two separate
interfaces for the two comparison functions.

In advance of that patch, this first commit creates a new API for
datum comparisons. It is defined as follows (`tree/compare.go`):

```go
// This implements the comparators for three separate relations.
//
// - a total ordering for the purpose of sorting and searching
//   values in indexes. In that relation, NULL sorts at the
//   same location as itself and before other values.
//
//   Functions: TotalOrderLess(), TotalOrderCompare().
//
// - the logical SQL scalar partial ordering, where non-NULL
//   values can be compared to each others but NULL comparisons
//   themselves produce a NULL result.
//
//   Function: scalarCompare()
//
// - the IS [NOT] DISTINCT relation, in which every value can
//   be compared to every other, NULLs are distinct from every
//   non-NULL value but not distinct from each other.
//
//   Function: Distinct().
//
// Due to the way the SQL language semantics are constructed, it is
// the case Distinct() returns true if and only if
// TotalOrderCompare() returns nonzero.  However, one should be
// careful when using this methods to properly convey *intent* to the
// reader of the code:
//
// - the functions related to the total order for sorting should only
//   be used in contexts that are about sorting values.
//
// - Distinct() and scalarCompare() should be used everywhere else.
//
// Besides, separating Distinct() from TotalOrderCompare() enables
// later performance optimizations of the former by specializing the
// code. This is currently done for e.g. EncDatums.
```

This commit only introduces the new API without change in behavior.

Release note: None
  • Loading branch information
knz committed Jul 25, 2018
1 parent f29dfe6 commit 31cfa27
Show file tree
Hide file tree
Showing 43 changed files with 445 additions and 309 deletions.
4 changes: 2 additions & 2 deletions pkg/sql/distsqlrun/aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,10 +418,10 @@ func (ag *orderedAggregator) close() {
// columns, and false otherwise.
func (ag *aggregatorBase) matchLastOrdGroupCols(row sqlbase.EncDatumRow) (bool, error) {
for _, colIdx := range ag.orderedGroupCols {
res, err := ag.lastOrdGroupCols[colIdx].Compare(
cmp, err := ag.lastOrdGroupCols[colIdx].Distinct(
&ag.inputTypes[colIdx], &ag.datumAlloc, &ag.flowCtx.EvalCtx, &row[colIdx],
)
if res != 0 || err != nil {
if cmp || err != nil {
return false, err
}
}
Expand Down
31 changes: 14 additions & 17 deletions pkg/sql/distsqlrun/disk_row_container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,27 +38,24 @@ import (
// l < r, 0 if l == r, and 1 if l > r. If an error is returned the int returned
// is invalid. Note that the comparison is only performed on the ordering
// columns.
func compareRows(
func distinctRows(
lTypes []sqlbase.ColumnType,
l, r sqlbase.EncDatumRow,
e *tree.EvalContext,
d *sqlbase.DatumAlloc,
ordering sqlbase.ColumnOrdering,
) (int, error) {
) (bool, error) {
for _, orderInfo := range ordering {
col := orderInfo.ColIdx
cmp, err := l[col].Compare(&lTypes[col], d, e, &r[orderInfo.ColIdx])
cmp, err := l[col].Distinct(&lTypes[col], d, e, &r[orderInfo.ColIdx])
if err != nil {
return 0, err
return false, err
}
if cmp != 0 {
if orderInfo.Direction == encoding.Descending {
cmp = -cmp
}
return cmp, nil
if cmp {
return true, nil
}
}
return 0, nil
return false, nil
}

func TestDiskRowContainer(t *testing.T) {
Expand Down Expand Up @@ -160,9 +157,9 @@ func TestDiskRowContainer(t *testing.T) {

// Check equality of the row we wrote and the row we read.
for i := range row {
if cmp, err := readRow[i].Compare(&types[i], &d.datumAlloc, &evalCtx, &row[i]); err != nil {
if cmp, err := readRow[i].Distinct(&types[i], &d.datumAlloc, &evalCtx, &row[i]); err != nil {
t.Fatal(err)
} else if cmp != 0 {
} else if cmp {
t.Fatalf("encoded %s but decoded %s", row.String(types), readRow.String(types))
}
}
Expand Down Expand Up @@ -222,13 +219,13 @@ func TestDiskRowContainer(t *testing.T) {
}

// Check sorted order.
if cmp, err := compareRows(
if cmp, err := distinctRows(
types, sortedRows.EncRow(numKeysRead), row, &evalCtx, &d.datumAlloc, ordering,
); err != nil {
t.Fatal(err)
} else if cmp != 0 {
} else if cmp {
t.Fatalf(
"expected %s to be equal to %s",
"expected %s to be not distinct from %s",
row.String(types),
sortedRows.EncRow(numKeysRead).String(types),
)
Expand Down Expand Up @@ -322,9 +319,9 @@ func TestDiskRowContainerFinalIterator(t *testing.T) {
// checkEqual checks that the given row is equal to otherRow.
checkEqual := func(row sqlbase.EncDatumRow, otherRow sqlbase.EncDatumRow) error {
for j, c := range row {
if cmp, err := c.Compare(&intType, alloc, &evalCtx, &otherRow[j]); err != nil {
if cmp, err := c.Distinct(&intType, alloc, &evalCtx, &otherRow[j]); err != nil {
return err
} else if cmp != 0 {
} else if cmp {
return fmt.Errorf(
"unexpected row %v, expected %v",
row.String(oneIntCol),
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/distsqlrun/distinct.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/mon"
"github.com/cockroachdb/cockroach/pkg/util/stringarena"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/opentracing/opentracing-go"
opentracing "github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
)

Expand Down Expand Up @@ -163,10 +163,10 @@ func (d *Distinct) matchLastGroupKey(row sqlbase.EncDatumRow) (bool, error) {
return false, nil
}
for _, colIdx := range d.orderedCols {
res, err := d.lastGroupKey[colIdx].Compare(
cmp, err := d.lastGroupKey[colIdx].Distinct(
&d.types[colIdx], &d.datumAlloc, d.evalCtx, &row[colIdx],
)
if res != 0 || err != nil {
if cmp || err != nil {
return false, err
}
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/distsqlrun/input_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func (s *orderedSynchronizer) Len() int {
func (s *orderedSynchronizer) Less(i, j int) bool {
si := &s.sources[s.heap[i]]
sj := &s.sources[s.heap[j]]
cmp, err := si.row.Compare(s.types, &s.alloc, s.ordering, s.evalCtx, sj.row)
cmp, err := si.row.TotalOrderCompare(s.types, &s.alloc, s.ordering, s.evalCtx, sj.row)
if err != nil {
s.err = err
return false
Expand Down Expand Up @@ -241,7 +241,7 @@ func (s *orderedSynchronizer) advanceRoot() error {
} else {
heap.Fix(s, 0)
// TODO(radu): this check may be costly, we could disable it in production
if cmp, err := oldRow.Compare(s.types, &s.alloc, s.ordering, s.evalCtx, src.row); err != nil {
if cmp, err := oldRow.TotalOrderCompare(s.types, &s.alloc, s.ordering, s.evalCtx, src.row); err != nil {
return err
} else if cmp > 0 {
return errors.Errorf(
Expand Down
14 changes: 7 additions & 7 deletions pkg/sql/distsqlrun/routers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/mon"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/opentracing/opentracing-go"
opentracing "github.com/opentracing/opentracing-go"
)

// setupRouter creates and starts a router. Returns the router and a WaitGroup
Expand Down Expand Up @@ -172,11 +172,11 @@ func TestRouters(t *testing.T) {
for _, row2 := range r2 {
equal := true
for _, c := range tc.spec.HashColumns {
cmp, err := row[c].Compare(&types[c], alloc, evalCtx, &row2[c])
cmp, err := row[c].Distinct(&types[c], alloc, evalCtx, &row2[c])
if err != nil {
t.Fatal(err)
}
if cmp != 0 {
if cmp {
equal = false
break
}
Expand Down Expand Up @@ -207,11 +207,11 @@ func TestRouters(t *testing.T) {

equal := true
for j, c := range row {
cmp, err := c.Compare(&types[j], alloc, evalCtx, &row2[j])
cmp, err := c.Distinct(&types[j], alloc, evalCtx, &row2[j])
if err != nil {
t.Fatal(err)
}
if cmp != 0 {
if cmp {
equal = false
break
}
Expand Down Expand Up @@ -801,9 +801,9 @@ func TestRouterDiskSpill(t *testing.T) {
}
// Verify correct order (should be the order in which we added rows).
for j, c := range row {
if cmp, err := c.Compare(&intType, alloc, &flowCtx.EvalCtx, &rows[i][j]); err != nil {
if cmp, err := c.Distinct(&intType, alloc, &flowCtx.EvalCtx, &rows[i][j]); err != nil {
t.Fatal(err)
} else if cmp != 0 {
} else if cmp {
t.Fatalf(
"order violated on row %d, expected %v got %v",
i,
Expand Down
11 changes: 4 additions & 7 deletions pkg/sql/distsqlrun/row_container.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,8 @@ func (mc *memRowContainer) initWithMon(

// Less is part of heap.Interface and is only meant to be used internally.
func (mc *memRowContainer) Less(i, j int) bool {
cmp := sqlbase.CompareDatums(mc.ordering, mc.evalCtx, mc.At(i), mc.At(j))
if mc.invertSorting {
cmp = -cmp
}
return cmp < 0
ra, rb := mc.At(i), mc.At(j)
return sqlbase.LessDatums(mc.ordering, mc.invertSorting, mc.evalCtx, ra, rb)
}

// EncRow returns the idx-th row as an EncDatumRow. The slice itself is reused
Expand Down Expand Up @@ -189,11 +186,11 @@ func (mc *memRowContainer) Pop() interface{} { panic("unimplemented") }
// smaller. Assumes InitTopK was called.
func (mc *memRowContainer) MaybeReplaceMax(ctx context.Context, row sqlbase.EncDatumRow) error {
max := mc.At(0)
cmp, err := row.CompareToDatums(mc.types, &mc.datumAlloc, mc.ordering, mc.evalCtx, max)
cmp, err := row.LessThanDatums(mc.types, &mc.datumAlloc, mc.ordering, mc.evalCtx, max)
if err != nil {
return err
}
if cmp < 0 {
if cmp {
// row is smaller than the max; replace.
for i := range row {
if err := row[i].EnsureDecoded(&mc.types[i], &mc.datumAlloc); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/distsqlrun/row_container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ func verifyRows(
if err != nil {
return err
}
if cmp, err := compareRows(
if cmp, err := distinctRows(
oneIntCol, row, expectedRows[0], evalCtx, &sqlbase.DatumAlloc{}, ordering,
); err != nil {
return err
} else if cmp != 0 {
} else if cmp {
return fmt.Errorf("unexpected row %v, expected %v", row, expectedRows[0])
}
expectedRows = expectedRows[1:]
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/distsqlrun/sorter.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
"github.com/cockroachdb/cockroach/pkg/util/mon"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/opentracing/opentracing-go"
opentracing "github.com/opentracing/opentracing-go"
)

// sorter sorts the input rows according to the specified ordering.
Expand Down Expand Up @@ -501,8 +501,8 @@ func (s *sortChunksProcessor) chunkCompleted(
types := s.input.OutputTypes()
for _, ord := range s.ordering[:s.matchLen] {
col := ord.ColIdx
cmp, err := nextChunkRow[col].Compare(&types[col], &s.alloc, s.evalCtx, &prefix[col])
if cmp != 0 || err != nil {
cmp, err := nextChunkRow[col].Distinct(&types[col], &s.alloc, s.evalCtx, &prefix[col])
if cmp || err != nil {
return true, err
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/distsqlrun/stream_group_accumulator.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func (s *streamGroupAccumulator) nextGroup(
continue
}

cmp, err := s.curGroup[0].Compare(s.types, &s.datumAlloc, s.ordering, evalCtx, row)
cmp, err := s.curGroup[0].TotalOrderCompare(s.types, &s.datumAlloc, s.ordering, evalCtx, row)
if err != nil {
return nil, &ProducerMetadata{Err: err}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/distsqlrun/stream_merger.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func CompareEncDatumRowForMerge(
}
continue
}
cmp, err := lhs[lIdx].Compare(&lhsTypes[lIdx], da, evalCtx, &rhs[rIdx])
cmp, err := lhs[lIdx].TotalOrderCompare(&lhsTypes[lIdx], da, evalCtx, &rhs[rIdx])
if err != nil {
return 0, err
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/distsqlrun/values_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,11 @@ func TestValuesProcessor(t *testing.T) {
t.Fatalf("row %d incorrect length %d, expected %d", i, len(res[i]), numCols)
}
for j, val := range res[i] {
cmp, err := val.Compare(&colTypes[j], &a, evalCtx, &inRows[i][j])
cmp, err := val.Distinct(&colTypes[j], &a, evalCtx, &inRows[i][j])
if err != nil {
t.Fatal(err)
}
if cmp != 0 {
if cmp {
t.Errorf(
"row %d, column %d: received %s, expected %s",
i, j, val.String(&colTypes[j]), inRows[i][j].String(&colTypes[j]),
Expand Down
15 changes: 7 additions & 8 deletions pkg/sql/distsqlrun/zigzagjoiner.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,8 @@ type zigzagJoiner struct {
// TODO(andrei): get rid of this field and move the actions it gates into the
// Start() method.
started bool

datumAlloc sqlbase.DatumAlloc
}

// Batch size is a parameter which determines how many rows should be fetched
Expand Down Expand Up @@ -604,12 +606,9 @@ func (z *zigzagJoiner) matchBase(curRow sqlbase.EncDatumRow, side int) (bool, er
}

// Compare the equality columns of the baseRow to that of the curRow.
da := &sqlbase.DatumAlloc{}
cmp, err := prevEqDatums.Compare(eqColTypes, da, ordering, &z.flowCtx.EvalCtx, curEqDatums)
if err != nil {
return false, err
}
return cmp == 0, nil
cmp, err := prevEqDatums.Distinct(
eqColTypes, &z.datumAlloc, ordering, &z.flowCtx.EvalCtx, curEqDatums)
return !cmp, err
}

// emitFromContainers returns the next row that is to be emitted from those
Expand Down Expand Up @@ -753,8 +752,8 @@ func (z *zigzagJoiner) nextRow(
if err != nil {
return nil, z.producerMeta(err)
}
da := &sqlbase.DatumAlloc{}
cmp, err := prevEqCols.Compare(eqColTypes, da, ordering, &z.flowCtx.EvalCtx, currentEqCols)
cmp, err := prevEqCols.TotalOrderCompare(
eqColTypes, &z.datumAlloc, ordering, &z.flowCtx.EvalCtx, currentEqCols)
if err != nil {
return nil, z.producerMeta(err)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/group.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ type groupRun struct {
// grouping columns, and false otherwise.
func (n *groupNode) matchLastGroupKey(ctx *tree.EvalContext, row tree.Datums) bool {
for _, i := range n.orderedGroupCols {
if n.run.lastOrderedGroupKey[i].Compare(ctx, row[i]) != 0 {
if tree.Distinct(ctx, n.run.lastOrderedGroupKey[i], row[i]) {
return false
}
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/opt/constraint/constraint.go
Original file line number Diff line number Diff line change
Expand Up @@ -436,12 +436,12 @@ func (c *Constraint) ExactPrefix(evalCtx *tree.EvalContext) int {
return col
}
startVal := sp.start.Value(col)
if startVal.Compare(evalCtx, sp.end.Value(col)) != 0 {
if tree.Distinct(evalCtx, startVal, sp.end.Value(col)) {
return col
}
if i == 0 {
val = startVal
} else if startVal.Compare(evalCtx, val) != 0 {
} else if tree.Distinct(evalCtx, startVal, val) {
return col
}
}
Expand All @@ -465,7 +465,7 @@ func (c *Constraint) Prefix(evalCtx *tree.EvalContext) int {
start := sp.StartKey()
end := sp.EndKey()
if start.Length() <= prefix || end.Length() <= prefix ||
start.Value(prefix).Compare(evalCtx, end.Value(prefix)) != 0 {
tree.Distinct(evalCtx, start.Value(prefix), end.Value(prefix)) {
return prefix
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/constraint/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ func (c *KeyContext) Compare(colIdx int, a, b tree.Datum) int {
if a == b {
return 0
}
cmp := a.Compare(c.EvalCtx, b)
cmp := tree.TotalOrderCompare(c.EvalCtx, a, b)
if c.Columns.Get(colIdx).Descending() {
cmp = -cmp
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/idxconstraint/index_constraints.go
Original file line number Diff line number Diff line change
Expand Up @@ -883,9 +883,9 @@ func (c *indexConstraintCtx) getMaxSimplifyPrefix(idxConstraint *constraint.Cons
for i := 0; i < idxConstraint.Spans.Count(); i++ {
sp := idxConstraint.Spans.Get(i)
j := 0
// Find the longest prefix of equal values.
// Find the longest prefix of non-distinct values.
for ; j < sp.StartKey().Length() && j < sp.EndKey().Length(); j++ {
if sp.StartKey().Value(j).Compare(c.evalCtx, sp.EndKey().Value(j)) != 0 {
if tree.Distinct(c.evalCtx, sp.StartKey().Value(j), sp.EndKey().Value(j)) {
break
}
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1189,7 +1189,7 @@ func (sb *statisticsBuilder) updateDistinctCountsFromConstraint(
}
startVal := sp.StartKey().Value(col)
endVal := sp.EndKey().Value(col)
if startVal.Compare(sb.evalCtx, endVal) != 0 {
if tree.Distinct(sb.evalCtx, startVal, endVal) {
// TODO(rytaft): are there other types we should handle here
// besides int?
if startVal.ResolvedType() == types.Int && endVal.ResolvedType() == types.Int {
Expand All @@ -1211,7 +1211,7 @@ func (sb *statisticsBuilder) updateDistinctCountsFromConstraint(
}
}
if i != 0 {
compare := startVal.Compare(sb.evalCtx, val)
compare := tree.TotalOrderCompare(sb.evalCtx, startVal, val)
ascending := c.Columns.Get(col).Ascending()
if (compare > 0 && ascending) || (compare < 0 && !ascending) {
// This check is needed to ensure that we calculate the correct distinct
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/xfunc/custom_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func (c *CustomFuncs) ConstructSortedUniqueList(list memo.ListID) (memo.ListID,
// Remove duplicates from the list.
n := 0
for i := 0; i < int(list.Length); i++ {
if i == 0 || ls.compare(i-1, i) < 0 {
if i == 0 || ls.less(i-1, i) {
lb.items[n] = lb.items[i]
n++
}
Expand Down
Loading

0 comments on commit 31cfa27

Please sign in to comment.