Skip to content

Commit 4fa2036

Browse files
authored
Add unsafe little endian loaders (#1036)
Benchmarks without assembly (may be a bit noisy) deflate: ``` BEFORE: github-june-2days-2019.json gzkp 1 6273951764 1073607045 17441 343.04 github-june-2days-2019.json gzkp 2 6273951764 1045461954 24258 246.65 github-june-2days-2019.json gzkp 3 6273951764 1030139729 21752 275.06 github-june-2days-2019.json gzkp 4 6273951764 992526317 25868 231.29 github-june-2days-2019.json gzkp 5 6273951764 938015731 28992 206.38 github-june-2days-2019.json gzkp 6 6273951764 918717756 32863 182.07 github-june-2days-2019.json gzkp 7 6273951764 924473679 42332 141.34 github-june-2days-2019.json gzkp 8 6273951764 905294390 53014 112.86 github-june-2days-2019.json gzkp 9 6273951764 895561157 100686 59.43 github-june-2days-2019.json gzkp -2 6273951764 4097019597 12499 478.70 github-june-2days-2019.json gzkp -3 6273951764 1175153215 24140 247.85 AFTER: github-june-2days-2019.json gzkp 1 6273951764 1073607045 16584 360.79 github-june-2days-2019.json gzkp 2 6273951764 1045461954 19113 313.04 github-june-2days-2019.json gzkp 3 6273951764 1030139729 20420 293.00 github-june-2days-2019.json gzkp 4 6273951764 992526317 23619 253.32 github-june-2days-2019.json gzkp 5 6273951764 938015731 26842 222.90 github-june-2days-2019.json gzkp 6 6273951764 918717756 30541 195.90 github-june-2days-2019.json gzkp 7 6273951764 924473679 43810 136.57 github-june-2days-2019.json gzkp 8 6273951764 905294390 73933 80.93 github-june-2days-2019.json gzkp 9 6273951764 895561157 98379 60.82 github-june-2days-2019.json gzkp -2 6273951764 4097019597 13439 445.20 github-june-2days-2019.json gzkp -3 6273951764 1175153215 22819 262.20 ``` zstd: ``` github-june-2days-2019.json zskp 1 6273951764 697439481 9378 637.96 github-june-2days-2019.json zskp 2 6273951764 610876538 12416 481.87 github-june-2days-2019.json zskp 3 6273951764 545382443 40775 146.74 github-june-2days-2019.json zskp 4 6273951764 522934301 114291 52.35 github-june-2days-2019.json zskp 1 6273951764 697439481 8325 718.69 github-june-2days-2019.json zskp 2 6273951764 610876538 9905 604.04 github-june-2days-2019.json zskp 3 6273951764 545382443 29954 199.74 github-june-2days-2019.json zskp 4 6273951764 522934301 111174 53.82 ``` s2: ``` github-june-2days-2019.json s2 1 6273951764 1041705230 522 11443.55 github-june-2days-2019.json s2 2 6273951764 944873043 1248 4793.24 github-june-2days-2019.json s2 3 6273951764 826384742 9999 598.37 github-june-2days-2019.json s2 1 6273951764 1041705230 464 12868.90 github-june-2days-2019.json s2 2 6273951764 944873043 861 6947.69 github-june-2days-2019.json s2 3 6273951764 826384742 9335 640.94 ``` <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Introduced a new `le` package for flexible integer type handling. - Added new functions for loading and storing binary data in little-endian format. - Enhanced test coverage with `nounsafe` build tag in GitHub Actions workflow. - **Refactor** - Updated byte loading mechanisms across multiple packages. - Replaced `encoding/binary` imports with custom `internal/le` package. - Modified bit reader and decoder offset handling. - Adjusted decoding logic to utilize cursor for state management. - Removed outdated comments regarding bounds checks in code. - **Chores** - Updated build constraints and import statements. - Refined error handling in decoding processes. - Adjusted assembly code offsets for improved performance. - Updated Go version from 1.19 to 1.21 in module file. - **Tests** - Simplified error reporting in decompression tests. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent 7d9f61a commit 4fa2036

26 files changed

+276
-193
lines changed

.github/workflows/go.yml

+42-51
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,24 @@ jobs:
3232
- name: Test
3333
run: go test ./...
3434

35-
- name: Test Noasm
35+
- name: Test No-asm
3636
run: go test -tags=noasm ./...
3737

38+
- name: Test No-unsafe
39+
run: go test -tags=nounsafe ./...
40+
41+
- name: Test No-unsafe, noasm
42+
run: go test -tags="nounsafe,noasm" ./...
43+
3844
- name: Test Race 1 CPU
3945
env:
4046
CGO_ENABLED: 1
41-
run: go test -cpu=1 -short -race -v ./...
47+
run: go test -cpu=1 -short -race -tags=nounsafe -v ./...
4248

4349
- name: Test Race 4 CPU
4450
env:
4551
CGO_ENABLED: 1
46-
run: go test -cpu=4 -short -race -v ./...
52+
run: go test -cpu=4 -short -race -tags=nounsafe -v ./...
4753

4854
generate:
4955
strategy:
@@ -112,6 +118,9 @@ jobs:
112118
env:
113119
CGO_ENABLED: 0
114120
runs-on: ubuntu-latest
121+
strategy:
122+
matrix:
123+
tags: [ 'nounsafe', '"noasm,nounsafe"' ]
115124
steps:
116125
- name: Set up Go
117126
uses: actions/[email protected]
@@ -121,28 +130,23 @@ jobs:
121130
- name: Checkout code
122131
uses: actions/checkout@v4
123132

124-
- name: S2/FuzzDictBlocks
125-
run: go test -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/.
133+
- name: S2/FuzzDictBlocks/${{ matrix.tags }}
134+
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/.
126135

127-
- name: S2/FuzzEncodingBlocks
128-
run: go test -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
136+
- name: S2/FuzzEncodingBlocks/${{ matrix.tags }}
137+
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
129138

130-
- name: S2/FuzzLZ4Block
131-
run: go test -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
139+
- name: S2/FuzzLZ4Block/${{ matrix.tags }}
140+
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
132141

133-
- name: S2/FuzzDictBlocks/noasm
134-
run: go test -tags=noasm -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/.
135-
136-
- name: S2/FuzzEncodingBlocks/noasm
137-
run: go test -tags=noasm -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
138-
139-
- name: S2/FuzzLZ4Block/noasm
140-
run: go test -tags=noasm -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
141142

142143
fuzz-zstd:
143144
env:
144145
CGO_ENABLED: 0
145146
runs-on: ubuntu-latest
147+
strategy:
148+
matrix:
149+
tags: [ 'nounsafe', '"noasm,nounsafe"' ]
146150
steps:
147151
- name: Set up Go
148152
uses: actions/[email protected]
@@ -152,57 +156,44 @@ jobs:
152156
- name: Checkout code
153157
uses: actions/checkout@v4
154158

155-
- name: zstd/FuzzDecodeAll
156-
run: go test -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
159+
- name: zstd/FuzzDecodeAll/${{ matrix.tags }}
160+
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
157161

158-
- name: zstd/FuzzDecAllNoBMI2
159-
run: go test -run=none -fuzz=FuzzDecAllNoBMI2 -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
162+
- name: zstd/FuzzDecAllNoBMI2/${{ matrix.tags }}
163+
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDecAllNoBMI2 -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
160164

161-
- name: zstd/FuzzDecoder
162-
run: go test -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
165+
- name: zstd/FuzzDecoder/${{ matrix.tags }}
166+
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
163167

164-
- name: zstd/FuzzNoBMI2Dec
165-
run: go test -run=none -fuzz=FuzzNoBMI2Dec -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
168+
- name: zstd/FuzzNoBMI2Dec/${{ matrix.tags }}
169+
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzNoBMI2Dec -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
166170

167-
- name: zstd/FuzzEncoding
168-
run: cd zstd&&go test -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd ..
169-
170-
- name: zstd/FuzzDecodeAll/noasm
171-
run: go test -tags=noasm -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
172-
173-
- name: zstd/FuzzDecoder/noasm
174-
run: go test -tags=noasm -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
175-
176-
- name: zstd/FuzzEncoding/noasm
177-
run: cd zstd&&go test -tags=noasm -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd ..
178-
179-
- name: zstd/FuzzEncodingBest
180-
run: cd zstd&&go test -run=none -fuzz=FuzzEncoding -fuzztime=25000x -test.fuzzminimizetime=10ms -fuzz-start=4&&cd ..
171+
- name: zstd/FuzzEncoding/${{ matrix.tags }}
172+
run: cd zstd&&go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd ..
181173

182174
fuzz-other:
183175
env:
184176
CGO_ENABLED: 0
185177
runs-on: ubuntu-latest
178+
strategy:
179+
matrix:
180+
tags: [ 'nounsafe', '"noasm,nounsafe"' ]
186181
steps:
187182
- name: Set up Go
188183
uses: actions/[email protected]
189184
with:
190185
go-version: 1.23.x
191-
192186
- name: Checkout code
193187
uses: actions/checkout@v4
194188

195-
- name: flate/FuzzEncoding
196-
run: go test -run=none -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/.
197-
198-
- name: flate/FuzzEncoding/noasm
199-
run: go test -run=none -tags=noasm -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/.
189+
- name: flate/FuzzEncoding/${{ matrix.tags }}
190+
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/.
200191

201-
- name: zip/FuzzReader
202-
run: go test -run=none -fuzz=FuzzReader -fuzztime=500000x -test.fuzzminimizetime=10ms ./zip/.
192+
- name: zip/FuzzReader/${{ matrix.tags }}
193+
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzReader -fuzztime=500000x -test.fuzzminimizetime=10ms ./zip/.
203194

204-
- name: fse/FuzzCompress
205-
run: go test -run=none -fuzz=FuzzCompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/.
195+
- name: fse/FuzzCompress/${{ matrix.tags }}
196+
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzCompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/.
206197

207-
- name: fse/FuzzDecompress
208-
run: go test -run=none -fuzz=FuzzDecompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/.
198+
- name: fse/FuzzDecompress/${{ matrix.tags }}
199+
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDecompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/.

README.md

+11
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,17 @@ This package provides various compression algorithms.
1414
[![Go](https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/compress/actions/workflows/go.yml)
1515
[![Sourcegraph Badge](https://sourcegraph.com/github.com/klauspost/compress/-/badge.svg)](https://sourcegraph.com/github.com/klauspost/compress?badge)
1616

17+
# package usage
18+
19+
Use `go get github.com/klauspost/compress@latest` to add it to your project.
20+
21+
This package will support the current Go version and 2 versions back.
22+
23+
* Use the `nounsafe` tag to disable all use of the "unsafe" package.
24+
* Use the `noasm` tag to disable all assembly across packages.
25+
26+
Use the links above for more information on each.
27+
1728
# changelog
1829

1930
* Sep 23rd, 2024 - [1.17.10](https://github.com/klauspost/compress/releases/tag/v1.17.10)

flate/fast_encoder.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
package flate
77

88
import (
9-
"encoding/binary"
109
"fmt"
10+
11+
"github.com/klauspost/compress/internal/le"
1112
)
1213

1314
type fastEnc interface {
@@ -58,11 +59,11 @@ const (
5859
)
5960

6061
func load3232(b []byte, i int32) uint32 {
61-
return binary.LittleEndian.Uint32(b[i:])
62+
return le.Load32(b, i)
6263
}
6364

6465
func load6432(b []byte, i int32) uint64 {
65-
return binary.LittleEndian.Uint64(b[i:])
66+
return le.Load64(b, i)
6667
}
6768

6869
type tableEntry struct {

flate/fuzz_test.go

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
//go:build go1.18
2-
// +build go1.18
32

43
package flate
54

flate/huffman_bit_writer.go

+10-9
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55
package flate
66

77
import (
8-
"encoding/binary"
98
"fmt"
109
"io"
1110
"math"
11+
12+
"github.com/klauspost/compress/internal/le"
1213
)
1314

1415
const (
@@ -438,7 +439,7 @@ func (w *huffmanBitWriter) writeOutBits() {
438439
n := w.nbytes
439440

440441
// We over-write, but faster...
441-
binary.LittleEndian.PutUint64(w.bytes[n:], bits)
442+
le.Store64(w.bytes[n:], bits)
442443
n += 6
443444

444445
if n >= bufferFlushSize {
@@ -854,7 +855,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
854855
bits |= c.code64() << (nbits & 63)
855856
nbits += c.len()
856857
if nbits >= 48 {
857-
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
858+
le.Store64(w.bytes[nbytes:], bits)
858859
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
859860
bits >>= 48
860861
nbits -= 48
@@ -882,7 +883,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
882883
bits |= c.code64() << (nbits & 63)
883884
nbits += c.len()
884885
if nbits >= 48 {
885-
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
886+
le.Store64(w.bytes[nbytes:], bits)
886887
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
887888
bits >>= 48
888889
nbits -= 48
@@ -905,7 +906,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
905906
bits |= uint64(extraLength) << (nbits & 63)
906907
nbits += extraLengthBits
907908
if nbits >= 48 {
908-
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
909+
le.Store64(w.bytes[nbytes:], bits)
909910
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
910911
bits >>= 48
911912
nbits -= 48
@@ -931,7 +932,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
931932
bits |= c.code64() << (nbits & 63)
932933
nbits += c.len()
933934
if nbits >= 48 {
934-
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
935+
le.Store64(w.bytes[nbytes:], bits)
935936
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
936937
bits >>= 48
937938
nbits -= 48
@@ -953,7 +954,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
953954
bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63)
954955
nbits += uint8(offsetComb)
955956
if nbits >= 48 {
956-
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
957+
le.Store64(w.bytes[nbytes:], bits)
957958
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
958959
bits >>= 48
959960
nbits -= 48
@@ -1107,7 +1108,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
11071108
// We must have at least 48 bits free.
11081109
if nbits >= 8 {
11091110
n := nbits >> 3
1110-
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
1111+
le.Store64(w.bytes[nbytes:], bits)
11111112
bits >>= (n * 8) & 63
11121113
nbits -= n * 8
11131114
nbytes += n
@@ -1136,7 +1137,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
11361137
// Remaining...
11371138
for _, t := range input {
11381139
if nbits >= 48 {
1139-
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
1140+
le.Store64(w.bytes[nbytes:], bits)
11401141
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
11411142
bits >>= 48
11421143
nbits -= 48

flate/level1.go

+16-15
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
package flate
22

33
import (
4-
"encoding/binary"
54
"fmt"
65
"math/bits"
6+
7+
"github.com/klauspost/compress/internal/le"
78
)
89

910
// fastGen maintains the table for matches,
@@ -126,26 +127,26 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
126127
l = e.matchlenLong(s+4, t+4, src) + 4
127128
} else {
128129
// inlined:
129-
a := src[s+4:]
130-
b := src[t+4:]
131-
for len(a) >= 8 {
132-
if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
130+
a := src[s:]
131+
b := src[t:]
132+
left := len(a) - 4
133+
for left >= 8 {
134+
if diff := le.Load64(a, l) ^ le.Load64(b, l); diff != 0 {
133135
l += int32(bits.TrailingZeros64(diff) >> 3)
134-
break
136+
goto endMatch
135137
}
136138
l += 8
137-
a = a[8:]
138-
b = b[8:]
139+
left -= 8
139140
}
140-
if len(a) < 8 {
141-
b = b[:len(a)]
142-
for i := range a {
143-
if a[i] != b[i] {
144-
break
145-
}
146-
l++
141+
a = a[l:]
142+
b = b[l:]
143+
for i := range a {
144+
if a[i] != b[i] {
145+
break
147146
}
147+
l++
148148
}
149+
endMatch:
149150
}
150151

151152
// Extend backwards

flate/matchlen_generic.go

+8-4
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,31 @@
77
package flate
88

99
import (
10-
"encoding/binary"
1110
"math/bits"
11+
12+
"github.com/klauspost/compress/internal/le"
1213
)
1314

1415
// matchLen returns the maximum common prefix length of a and b.
1516
// a must be the shortest of the two.
1617
func matchLen(a, b []byte) (n int) {
17-
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
18-
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
18+
left := len(a)
19+
for left >= 8 {
20+
diff := le.Load64(a, n) ^ le.Load64(b, n)
1921
if diff != 0 {
2022
return n + bits.TrailingZeros64(diff)>>3
2123
}
2224
n += 8
25+
left -= 8
2326
}
2427

28+
a = a[n:]
29+
b = b[n:]
2530
for i := range a {
2631
if a[i] != b[i] {
2732
break
2833
}
2934
n++
3035
}
3136
return n
32-
3337
}

0 commit comments

Comments
 (0)