Skip to content

Commit

Permalink
slices: initial implementation of sorting functions
Browse files Browse the repository at this point in the history
Implements golang/go#47619 in the exp/slices package as a
testing ground prior to inclusion in the standard library.

Relies on the modified sorting function code generator proposed
in https://go-review.googlesource.com/c/go/+/353069 to
automatically generate the code of the sorting functions.

Benchmark comparing sort.Ints with the generic Sort function
added in this CL to sort a slice of int:

name           old time/op  new time/op  delta
Sort-8         12.0ms ± 1%   6.5ms ± 1%  -46.02%  (p=0.000 n=9+10)

Benchmark comparing sort.Sort with SortFunc to sort a slice of
struct pointers based on one field in the struct:

name           old time/op  new time/op  delta
SortStructs-8  18.6ms ± 2%  15.9ms ± 3%  -14.43%  (p=0.000 n=10+10)

Change-Id: Ic301aae7e5b8f99144e39b8a77fde897779588ed
Reviewed-on: https://go-review.googlesource.com/c/exp/+/378134
Reviewed-by: Ian Lance Taylor <[email protected]>
Trust: Cody Oss <[email protected]>
Trust: Jeremy Faller <[email protected]>
  • Loading branch information
eliben authored and ianlancetaylor committed Jan 21, 2022
1 parent 2c358f7 commit 7b334a1
Show file tree
Hide file tree
Showing 5 changed files with 1,079 additions and 0 deletions.
95 changes: 95 additions & 0 deletions slices/sort.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package slices

import "constraints"

// Sort sorts a slice of any ordered type in ascending order.
func Sort[Elem constraints.Ordered](x []Elem) {
n := len(x)
quickSortOrdered(x, 0, n, maxDepth(n))
}

// Sort sorts the slice x in ascending order as determined by the less function.
// This sort is not guaranteed to be stable.
func SortFunc[Elem any](x []Elem, less func(a, b Elem) bool) {
n := len(x)
quickSortLessFunc(x, 0, n, maxDepth(n), less)
}

// SortStable sorts the slice x while keeping the original order of equal
// elements, using less to compare elements.
func SortStableFunc[Elem any](x []Elem, less func(a, b Elem) bool) {
stableLessFunc(x, len(x), less)
}

// IsSorted reports whether x is sorted in ascending order.
func IsSorted[Elem constraints.Ordered](x []Elem) bool {
for i := len(x) - 1; i > 0; i-- {
if x[i] < x[i-1] {
return false
}
}
return true
}

// IsSortedFunc reports whether x is sorted in ascending order, with less as the
// comparison function.
func IsSortedFunc[Elem any](x []Elem, less func(a, b Elem) bool) bool {
for i := len(x) - 1; i > 0; i-- {
if less(x[i], x[i-1]) {
return false
}
}
return true
}

// BinarySearch searches for target in a sorted slice and returns the smallest
// index at which target is found. If the target is not found, the index at
// which it could be inserted into the slice is returned; therefore, if the
// intention is to find target itself a separate check for equality with the
// element at the returned index is required.
func BinarySearch[Elem constraints.Ordered](x []Elem, target Elem) int {
return search(len(x), func(i int) bool { return x[i] >= target })
}

// BinarySearchFunc uses binary search to find and return the smallest index i
// in [0, n) at which ok(i) is true, assuming that on the range [0, n),
// ok(i) == true implies ok(i+1) == true. That is, BinarySearchFunc requires
// that ok is false for some (possibly empty) prefix of the input range [0, n)
// and then true for the (possibly empty) remainder; BinarySearchFunc returns
// the first true index. If there is no such index, BinarySearchFunc returns n.
// (Note that the "not found" return value is not -1 as in, for instance,
// strings.Index.) Search calls ok(i) only for i in the range [0, n).
func BinarySearchFunc[Elem any](x []Elem, ok func(Elem) bool) int {
return search(len(x), func(i int) bool { return ok(x[i]) })
}

// maxDepth returns a threshold at which quicksort should switch
// to heapsort. It returns 2*ceil(lg(n+1)).
func maxDepth(n int) int {
var depth int
for i := n; i > 0; i >>= 1 {
depth++
}
return depth * 2
}

func search(n int, f func(int) bool) int {
// Define f(-1) == false and f(n) == true.
// Invariant: f(i-1) == false, f(j) == true.
i, j := 0, n
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
if !f(h) {
i = h + 1 // preserves f(i-1) == false
} else {
j = h // preserves f(j) == true
}
}
// i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i.
return i
}
116 changes: 116 additions & 0 deletions slices/sort_benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package slices

import (
"math/rand"
"sort"
"testing"
)

// These benchmarks compare sorting a large slice of int with sort.Ints vs.
// slices.Sort
func makeRandomInts(n int) []int {
rand.Seed(42)
ints := make([]int, n)
for i := 0; i < n; i++ {
ints[i] = rand.Intn(n)
}
return ints
}

const N = 100_000

func BenchmarkSortInts(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
ints := makeRandomInts(N)
b.StartTimer()
sort.Ints(ints)
}
}

func BenchmarkSlicesSort(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
ints := makeRandomInts(N)
b.StartTimer()
Sort(ints)
}
}

// Since we're benchmarking these sorts against each other, make sure that they
// generate similar results.
func TestIntSorts(t *testing.T) {
ints := makeRandomInts(200)
ints2 := Clone(ints)

sort.Ints(ints)
Sort(ints2)

for i := range ints {
if ints[i] != ints2[i] {
t.Fatalf("ints2 mismatch at %d; %d != %d", i, ints[i], ints2[i])
}
}
}

// These benchmarks compare sorting a slice of structs with sort.Sort vs.
// slices.SortFunc.
type myStruct struct {
a, b, c, d string
n int
}

type myStructs []*myStruct

func (s myStructs) Len() int { return len(s) }
func (s myStructs) Less(i, j int) bool { return s[i].n < s[j].n }
func (s myStructs) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

func makeRandomStructs(n int) myStructs {
rand.Seed(42)
structs := make([]*myStruct, n)
for i := 0; i < n; i++ {
structs[i] = &myStruct{n: rand.Intn(n)}
}
return structs
}

func TestStructSorts(t *testing.T) {
ss := makeRandomStructs(200)
ss2 := make([]*myStruct, len(ss))
for i := range ss {
ss2[i] = &myStruct{n: ss[i].n}
}

sort.Sort(ss)
SortFunc(ss2, func(a, b *myStruct) bool { return a.n < b.n })

for i := range ss {
if *ss[i] != *ss2[i] {
t.Fatalf("ints2 mismatch at %d; %v != %v", i, *ss[i], *ss2[i])
}
}
}

func BenchmarkSortStructs(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
ss := makeRandomStructs(N)
b.StartTimer()
sort.Sort(ss)
}
}

func BenchmarkSortFuncStructs(b *testing.B) {
lessFunc := func(a, b *myStruct) bool { return a.n < b.n }
for i := 0; i < b.N; i++ {
b.StopTimer()
ss := makeRandomStructs(N)
b.StartTimer()
SortFunc(ss, lessFunc)
}
}
182 changes: 182 additions & 0 deletions slices/sort_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package slices

import (
"math"
"math/rand"
"testing"
)

var ints = [...]int{74, 59, 238, -784, 9845, 959, 905, 0, 0, 42, 7586, -5467984, 7586}
var float64s = [...]float64{74.3, 59.0, math.Inf(1), 238.2, -784.0, 2.3, math.NaN(), math.NaN(), math.Inf(-1), 9845.768, -959.7485, 905, 7.8, 7.8}
var strs = [...]string{"", "Hello", "foo", "bar", "foo", "f00", "%*&^*&^&", "***"}

func TestSortIntSlice(t *testing.T) {
data := ints[:]
Sort(data)
if !IsSorted(data) {
t.Errorf("sorted %v", ints)
t.Errorf(" got %v", data)
}
}

func TestSortFuncIntSlice(t *testing.T) {
data := ints[:]
SortFunc(data, func(a, b int) bool { return a < b })
if !IsSorted(data) {
t.Errorf("sorted %v", ints)
t.Errorf(" got %v", data)
}
}

func TestSortFloat64Slice(t *testing.T) {
data := float64s[:]
Sort(data)
if !IsSorted(data) {
t.Errorf("sorted %v", float64s)
t.Errorf(" got %v", data)
}
}

func TestSortStringSlice(t *testing.T) {
data := strs[:]
Sort(data)
if !IsSorted(data) {
t.Errorf("sorted %v", strs)
t.Errorf(" got %v", data)
}
}

func TestSortLarge_Random(t *testing.T) {
n := 1000000
if testing.Short() {
n /= 100
}
data := make([]int, n)
for i := 0; i < len(data); i++ {
data[i] = rand.Intn(100)
}
if IsSorted(data) {
t.Fatalf("terrible rand.rand")
}
Sort(data)
if !IsSorted(data) {
t.Errorf("sort didn't sort - 1M ints")
}
}

type intPair struct {
a, b int
}

type intPairs []intPair

// Pairs compare on a only.
func intPairLess(x, y intPair) bool {
return x.a < y.a
}

// Record initial order in B.
func (d intPairs) initB() {
for i := range d {
d[i].b = i
}
}

// InOrder checks if a-equal elements were not reordered.
func (d intPairs) inOrder() bool {
lastA, lastB := -1, 0
for i := 0; i < len(d); i++ {
if lastA != d[i].a {
lastA = d[i].a
lastB = d[i].b
continue
}
if d[i].b <= lastB {
return false
}
lastB = d[i].b
}
return true
}

func TestStability(t *testing.T) {
n, m := 100000, 1000
if testing.Short() {
n, m = 1000, 100
}
data := make(intPairs, n)

// random distribution
for i := 0; i < len(data); i++ {
data[i].a = rand.Intn(m)
}
if IsSortedFunc(data, intPairLess) {
t.Fatalf("terrible rand.rand")
}
data.initB()
SortStableFunc(data, intPairLess)
if !IsSortedFunc(data, intPairLess) {
t.Errorf("Stable didn't sort %d ints", n)
}
if !data.inOrder() {
t.Errorf("Stable wasn't stable on %d ints", n)
}

// already sorted
data.initB()
SortStableFunc(data, intPairLess)
if !IsSortedFunc(data, intPairLess) {
t.Errorf("Stable shuffled sorted %d ints (order)", n)
}
if !data.inOrder() {
t.Errorf("Stable shuffled sorted %d ints (stability)", n)
}

// sorted reversed
for i := 0; i < len(data); i++ {
data[i].a = len(data) - i
}
data.initB()
SortStableFunc(data, intPairLess)
if !IsSortedFunc(data, intPairLess) {
t.Errorf("Stable didn't sort %d ints", n)
}
if !data.inOrder() {
t.Errorf("Stable wasn't stable on %d ints", n)
}
}

func TestBinarySearch(t *testing.T) {
data := []string{"aa", "ad", "ca", "xy"}
tests := []struct {
target string
want int
}{
{"aa", 0},
{"ab", 1},
{"ad", 1},
{"ax", 2},
{"ca", 2},
{"cc", 3},
{"dd", 3},
{"xy", 3},
{"zz", 4},
}
for _, tt := range tests {
t.Run(tt.target, func(t *testing.T) {
i := BinarySearch(data, tt.target)
if i != tt.want {
t.Errorf("BinarySearch want %d, got %d", tt.want, i)
}

j := BinarySearchFunc(data, func(s string) bool { return s >= tt.target })
if j != tt.want {
t.Errorf("BinarySearchFunc want %d, got %d", tt.want, j)
}
})
}
}
Loading

0 comments on commit 7b334a1

Please sign in to comment.