-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmath.go
318 lines (301 loc) · 9.68 KB
/
math.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
// chris 052315
package swar
import (
"math"
)
// Abs32 returns the absolute value of a 32-bit float.
//
// Special cases are:
// Abs32(±Inf) = Inf
// Abs32(NaN) = NaN
func Abs32(x float32) float32 {
xb := math.Float32bits(x)
return math.Float32frombits(xb & 0x7fffffff)
}
// Abs64 returns the absolute value of a 64-bit float.
//
// Special cases are:
// Abs64(±Inf) = Inf
// Abs64(NaN) = NaN
func Abs64(x float64) float64 {
xb := math.Float64bits(x)
return math.Float64frombits(xb & 0x7fffffffffffffff)
}
// Average returns the (floor of the) average of x and y.
//
// Commentary from The Aggregate:
//
// This is actually an extension of the "well known" fact that for
// binary integer values x and y, x + y equals (x & y) + (x | y) equals
// (x ^ y) + 2 * (x & y).
//
// Given two integer values x and y, the (floor of the) average normally
// would be computed by (x + y) / 2; unfortunately, this can yield
// incorrect results due to overflow. A very sneaky alternative is to
// use (x & y) + ((x ^ y) / 2). The benefit is that this code sequence
// cannot overflow.
//
// Shifts in Go are signed, so this can be simplified to
// (x & y) + ((x ^ y) >> 1).
func Average(x, y uint) uint {
return (x & y) + ((x ^ y) >> 1)
}
// DivCeil32 returns the ceiling of the quotient of a and b.
//
// Uses 64-bit integers. We take the simple approach here and avoid
// integer overflow by using twice as many bits as are necessary.
//
// Commentary from The Aggregate:
//
// This trick also works if divide is implemented in less obvious ways,
// such as shifts or shift-and-subtract sequences.
func DivCeil32(a, b uint32) uint32 {
a64 := uint64(a)
b64 := uint64(b)
return uint32((a64 + b64 - 1) / b64)
}
// DivRoundNearest32 returns the quotient of a and b, rounded to the
// nearest integer.
//
// Uses 64-bit integers. We take the simple approach here and avoid
// integer overflow by using twice as many bits as are necessary.
//
// Commentary from The Aggregate:
//
// This trick also works if divide is implemented in less obvious ways,
// such as shifts or shift-and-subtract sequences.
func DivRoundNearest32(a, b uint32) uint32 {
a64 := uint64(a)
b64 := uint64(b)
return uint32((a64 + (b64 / 2)) / b64)
}
// Min32 returns the minimum of x and y.
//
// Uses 64-bit integers. We take the simple approach here and avoid
// integer over- and underflow by using twice as many bits are are
// necessary.
//
// Commentary from The Aggregate:
//
// Logically, this works because the shift by (WORDBITS-1) replicates
// the sign bit to create a mask
func Min32(x, y int32) int32 {
x64 := int64(x)
y64 := int64(y)
return int32(x64 + (((y64 - x64) >> (64 - 1)) & (y64 - x64)))
}
// Max32 returns the maximum of x and y.
//
// Uses 64-bit integers. We take the simple approach here and avoid
// integer over- and underflow by using twice as many bits are are
// necessary.
//
// Commentary from The Aggregate:
//
// Logically, this works because the shift by (WORDBITS-1) replicates
// the sign bit to create a mask
func Max32(x, y int32) int32 {
x64 := int64(x)
y64 := int64(y)
return int32(x64 - (((x64 - y64) >> (64 - 1)) & (x64 - y64)))
}
// IsPow2 returns whether x is a power of 2.
func IsPow2(x uint) bool {
return x&(x-1) == 0
}
// Nlpo232 returns the next largest power of 2 from 32-bit x.
//
// Special cases are:
//
// Nlpo232(x) = 0 // where x has the high bit set
//
// Commentary from The Aggregate:
//
// Given a binary integer value x, the next largest power of 2 can be
// computed by a SWAR algorithm that recursively "folds" the upper bits
// into the lower bits. This process yields a bit vector with the same
// most significant 1 as x, but all 1's below it. Adding 1 to that
// value yields the next largest power of 2.
func Nlpo232(x uint32) uint32 {
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
return x + 1
}
// Nlpo264 returns the next largest power of 2 from 64-bit x.
//
// Special cases are:
//
// Nlpo264(x) = 0 // where x has the high bit set
//
// Commentary from The Aggregate:
//
// Given a binary integer value x, the next largest power of 2 can be
// computed by a SWAR algorithm that recursively "folds" the upper bits
// into the lower bits. This process yields a bit vector with the same
// most significant 1 as x, but all 1's below it. Adding 1 to that
// value yields the next largest power of 2.
func Nlpo264(x uint64) uint64 {
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
x |= x >> 32
return x + 1
}
// SameWithinTolerance32 determines if 32-bit integers a and b have the
// same value within the given 32-bit tolerance c.
//
// Uses 64-bit integers. We take the simple approach here and avoid
// integer over- and underflow by using twice as many bits are are
// necessary.
//
// Commentary from The Aggregate:
//
// The obvious test would be something like
// (a > b) ? (a - b) : (b - a) < c, which isn't horrifically
// inefficient, but does involve a conditional branch. Alternatively,
// abs(a-b) < c would do... but again, it takes some cleverness to
// implement abs() without a conditional jump. Here's a branchless
// alternative.
//
// If a - b > 0, then b - a < 0; similarly, if a - b < 0, then
// b - a > 0. Both can't be greater than 0 simultaneously. Suppose
// that a - b > 0. Subtracting (a - b) - c will produce a negative
// result iff a and b are within c of each other. Of course, our
// assumption requires b - a < 0, so (b - a) - c simply becomes more
// negative (assuming the value doesn't wrap around). Generalizing, if
// either (a - b) - c > 0 or (b - a) - c > 0 then the values of a and b
// are not the same within tolerance c. In other words, they are within
// tolerance if:
//
// (a - b - c) & (b - a - c) < 0
//
// This test can be rewritten a variety of ways. The < 0 part is really
// just examining the sign bit, so a mask or shift could be used to
// extract the bit value instead. For example, using 32-bit words,
// (a - b - c) & (b - a - c) >> 31 using unsigned >> will produce the
// value 1 for true or 0 for false. It is also possible to factor-out
// t = a - b, giving:
//
// (t - c) & (-t - c) < 0
//
// Which is really equivalent to abs(t) < c.
func SameWithinTolerance32(a, b, c int32) bool {
a64 := int64(a)
b64 := int64(b)
c64 := int64(c)
t64 := a64 - b64
return ((t64-c64)&(-t64-c64)>>(64-1))&1 == 1
}
// Log2Floor32 returns the floor of the base 2 logarithm of 32-bit x.
//
// Special case is:
//
// Log2Floor32(0) = 0
//
// Commentary from The Aggregate:
//
// Given a binary integer value x, the floor of the base 2 log of that
// number efficiently can be computed by the application of two
// variable-precision SWAR algorithms. The first "folds" the upper bits
// into the lower bits to construct a bit vector with the same most
// significant 1 as x, but all 1's below it. The second SWAR algorithm
// is population count, defined elsewhere in this package.
func Log2Floor32(x uint32) uint32 {
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
return Ones32(x >> 1)
}
// Log2Floor64 returns the floor of the base 2 logarithm of 64-bit x.
//
// Special case is:
//
// Log2Floor64(0) = 0
//
// Commentary from The Aggregate:
//
// Given a binary integer value x, the floor of the base 2 log of that
// number efficiently can be computed by the application of two
// variable-precision SWAR algorithms. The first "folds" the upper bits
// into the lower bits to construct a bit vector with the same most
// significant 1 as x, but all 1's below it. The second SWAR algorithm
// is population count, defined elsewhere in this package.
func Log2Floor64(x uint64) uint64 {
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
x |= x >> 32
return Ones64(x >> 1)
}
// Log2Ceil32 returns the ceiling of the base 2 logarithm of 32-bit x.
//
// Special case is:
//
// Log2Ceil32(0) = 0
//
// Commentary from The Aggregate:
//
// Given a binary integer value x, the floor of the base 2 log of that
// number efficiently can be computed by the application of two
// variable-precision SWAR algorithms. The first "folds" the upper bits
// into the lower bits to construct a bit vector with the same most
// significant 1 as x, but all 1's below it. The second SWAR algorithm
// is population count, defined elsewhere in this package.
//
// Suppose instead that you want the ceiling of the base 2 log. The
// floor and ceiling are identical if x is a power of two; otherwise,
// the result is 1 too small. This can be corrected using the power of
// 2 test followed with the comparison-to-mask shift used in integer
// minimum/maximum.
func Log2Ceil32(x uint32) uint32 {
y := x & (x - 1) // Like IsPow2.
y |= -y
y >>= 32 - 1
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
return Ones32(x>>1) + y
}
// Log2Ceil64 returns the ceiling of the base 2 logarithm of 64-bit x.
//
// Special case is:
//
// Log2Ceil64(0) = 0
//
// Commentary from The Aggregate:
//
// Given a binary integer value x, the floor of the base 2 log of that
// number efficiently can be computed by the application of two
// variable-precision SWAR algorithms. The first "folds" the upper bits
// into the lower bits to construct a bit vector with the same most
// significant 1 as x, but all 1's below it. The second SWAR algorithm
// is population count, defined elsewhere in this package.
//
// Suppose instead that you want the ceiling of the base 2 log. The
// floor and ceiling are identical if x is a power of two; otherwise,
// the result is 1 too small. This can be corrected using the power of
// 2 test followed with the comparison-to-mask shift used in integer
// minimum/maximum.
func Log2Ceil64(x uint64) uint64 {
y := x & (x - 1) // Like IsPow2.
y |= -y
y >>= 64 - 1
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
x |= x >> 32
return Ones64(x>>1) + y
}