Skip to content

Commit

Permalink
Add unsafe little endian loaders (#1036)
Browse files Browse the repository at this point in the history
Benchmarks without assembly (may be a bit noisy)

deflate:
```
BEFORE:
github-june-2days-2019.json     gzkp    1       6273951764      1073607045      17441   343.04
github-june-2days-2019.json     gzkp    2       6273951764      1045461954      24258   246.65
github-june-2days-2019.json     gzkp    3       6273951764      1030139729      21752   275.06
github-june-2days-2019.json     gzkp    4       6273951764      992526317       25868   231.29
github-june-2days-2019.json     gzkp    5       6273951764      938015731       28992   206.38
github-june-2days-2019.json     gzkp    6       6273951764      918717756       32863   182.07
github-june-2days-2019.json     gzkp    7       6273951764      924473679       42332   141.34
github-june-2days-2019.json     gzkp    8       6273951764      905294390       53014   112.86
github-june-2days-2019.json     gzkp    9       6273951764      895561157       100686  59.43
github-june-2days-2019.json     gzkp    -2      6273951764      4097019597      12499   478.70
github-june-2days-2019.json     gzkp    -3      6273951764      1175153215      24140   247.85

AFTER:
github-june-2days-2019.json     gzkp    1       6273951764      1073607045      16584   360.79
github-june-2days-2019.json     gzkp    2       6273951764      1045461954      19113   313.04
github-june-2days-2019.json     gzkp    3       6273951764      1030139729      20420   293.00
github-june-2days-2019.json     gzkp    4       6273951764      992526317       23619   253.32
github-june-2days-2019.json     gzkp    5       6273951764      938015731       26842   222.90
github-june-2days-2019.json     gzkp    6       6273951764      918717756       30541   195.90
github-june-2days-2019.json     gzkp    7       6273951764      924473679       43810   136.57
github-june-2days-2019.json     gzkp    8       6273951764      905294390       73933   80.93
github-june-2days-2019.json     gzkp    9       6273951764      895561157       98379   60.82
github-june-2days-2019.json     gzkp    -2      6273951764      4097019597      13439   445.20
github-june-2days-2019.json     gzkp    -3      6273951764      1175153215      22819   262.20
```

zstd:
```
github-june-2days-2019.json     zskp    1       6273951764      697439481       9378    637.96
github-june-2days-2019.json     zskp    2       6273951764      610876538       12416   481.87
github-june-2days-2019.json     zskp    3       6273951764      545382443       40775   146.74
github-june-2days-2019.json     zskp    4       6273951764      522934301       114291  52.35

github-june-2days-2019.json     zskp    1       6273951764      697439481       8325    718.69
github-june-2days-2019.json     zskp    2       6273951764      610876538       9905    604.04
github-june-2days-2019.json     zskp    3       6273951764      545382443       29954   199.74
github-june-2days-2019.json     zskp    4       6273951764      522934301       111174  53.82
```

s2:
```
github-june-2days-2019.json     s2      1       6273951764      1041705230      522     11443.55
github-june-2days-2019.json     s2      2       6273951764      944873043       1248    4793.24
github-june-2days-2019.json     s2      3       6273951764      826384742       9999    598.37

github-june-2days-2019.json     s2      1       6273951764      1041705230      464     12868.90
github-june-2days-2019.json     s2      2       6273951764      944873043       861     6947.69
github-june-2days-2019.json     s2      3       6273951764      826384742       9335    640.94
```

<!-- This is an auto-generated comment: release notes by coderabbit.ai -->
## Summary by CodeRabbit

- **New Features**
	- Introduced a new `le` package for flexible integer type handling.
	- Added new functions for loading and storing binary data in little-endian format.
	- Enhanced test coverage with `nounsafe` build tag in GitHub Actions workflow.

- **Refactor**
	- Updated byte loading mechanisms across multiple packages.
	- Replaced `encoding/binary` imports with custom `internal/le` package.
	- Modified bit reader and decoder offset handling.
	- Adjusted decoding logic to utilize cursor for state management.
	- Removed outdated comments regarding bounds checks in code.

- **Chores**
	- Updated build constraints and import statements.
	- Refined error handling in decoding processes.
	- Adjusted assembly code offsets for improved performance.
	- Updated Go version from 1.19 to 1.21 in module file.

- **Tests**
	- Simplified error reporting in decompression tests.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
  • Loading branch information
klauspost authored Jan 15, 2025
1 parent 7d9f61a commit 4fa2036
Show file tree
Hide file tree
Showing 26 changed files with 276 additions and 193 deletions.
93 changes: 42 additions & 51 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,24 @@ jobs:
- name: Test
run: go test ./...

- name: Test Noasm
- name: Test No-asm
run: go test -tags=noasm ./...

- name: Test No-unsafe
run: go test -tags=nounsafe ./...

- name: Test No-unsafe, noasm
run: go test -tags="nounsafe,noasm" ./...

- name: Test Race 1 CPU
env:
CGO_ENABLED: 1
run: go test -cpu=1 -short -race -v ./...
run: go test -cpu=1 -short -race -tags=nounsafe -v ./...

- name: Test Race 4 CPU
env:
CGO_ENABLED: 1
run: go test -cpu=4 -short -race -v ./...
run: go test -cpu=4 -short -race -tags=nounsafe -v ./...

generate:
strategy:
Expand Down Expand Up @@ -112,6 +118,9 @@ jobs:
env:
CGO_ENABLED: 0
runs-on: ubuntu-latest
strategy:
matrix:
tags: [ 'nounsafe', '"noasm,nounsafe"' ]
steps:
- name: Set up Go
uses: actions/setup-go@v5.2.0
Expand All @@ -121,28 +130,23 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: S2/FuzzDictBlocks
run: go test -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/.
- name: S2/FuzzDictBlocks/${{ matrix.tags }}
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/.

- name: S2/FuzzEncodingBlocks
run: go test -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
- name: S2/FuzzEncodingBlocks/${{ matrix.tags }}
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.

- name: S2/FuzzLZ4Block
run: go test -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
- name: S2/FuzzLZ4Block/${{ matrix.tags }}
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.

- name: S2/FuzzDictBlocks/noasm
run: go test -tags=noasm -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/.

- name: S2/FuzzEncodingBlocks/noasm
run: go test -tags=noasm -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.

- name: S2/FuzzLZ4Block/noasm
run: go test -tags=noasm -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.

fuzz-zstd:
env:
CGO_ENABLED: 0
runs-on: ubuntu-latest
strategy:
matrix:
tags: [ 'nounsafe', '"noasm,nounsafe"' ]
steps:
- name: Set up Go
uses: actions/setup-go@v5.2.0
Expand All @@ -152,57 +156,44 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: zstd/FuzzDecodeAll
run: go test -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
- name: zstd/FuzzDecodeAll/${{ matrix.tags }}
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.

- name: zstd/FuzzDecAllNoBMI2
run: go test -run=none -fuzz=FuzzDecAllNoBMI2 -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
- name: zstd/FuzzDecAllNoBMI2/${{ matrix.tags }}
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDecAllNoBMI2 -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.

- name: zstd/FuzzDecoder
run: go test -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
- name: zstd/FuzzDecoder/${{ matrix.tags }}
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.

- name: zstd/FuzzNoBMI2Dec
run: go test -run=none -fuzz=FuzzNoBMI2Dec -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
- name: zstd/FuzzNoBMI2Dec/${{ matrix.tags }}
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzNoBMI2Dec -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.

- name: zstd/FuzzEncoding
run: cd zstd&&go test -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd ..

- name: zstd/FuzzDecodeAll/noasm
run: go test -tags=noasm -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.

- name: zstd/FuzzDecoder/noasm
run: go test -tags=noasm -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.

- name: zstd/FuzzEncoding/noasm
run: cd zstd&&go test -tags=noasm -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd ..

- name: zstd/FuzzEncodingBest
run: cd zstd&&go test -run=none -fuzz=FuzzEncoding -fuzztime=25000x -test.fuzzminimizetime=10ms -fuzz-start=4&&cd ..
- name: zstd/FuzzEncoding/${{ matrix.tags }}
run: cd zstd&&go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd ..

fuzz-other:
env:
CGO_ENABLED: 0
runs-on: ubuntu-latest
strategy:
matrix:
tags: [ 'nounsafe', '"noasm,nounsafe"' ]
steps:
- name: Set up Go
uses: actions/setup-go@v5.2.0
with:
go-version: 1.23.x

- name: Checkout code
uses: actions/checkout@v4

- name: flate/FuzzEncoding
run: go test -run=none -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/.

- name: flate/FuzzEncoding/noasm
run: go test -run=none -tags=noasm -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/.
- name: flate/FuzzEncoding/${{ matrix.tags }}
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/.

- name: zip/FuzzReader
run: go test -run=none -fuzz=FuzzReader -fuzztime=500000x -test.fuzzminimizetime=10ms ./zip/.
- name: zip/FuzzReader/${{ matrix.tags }}
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzReader -fuzztime=500000x -test.fuzzminimizetime=10ms ./zip/.

- name: fse/FuzzCompress
run: go test -run=none -fuzz=FuzzCompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/.
- name: fse/FuzzCompress/${{ matrix.tags }}
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzCompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/.

- name: fse/FuzzDecompress
run: go test -run=none -fuzz=FuzzDecompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/.
- name: fse/FuzzDecompress/${{ matrix.tags }}
run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDecompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,17 @@ This package provides various compression algorithms.
[![Go](/~https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](/~https://github.com/klauspost/compress/actions/workflows/go.yml)
[![Sourcegraph Badge](https://sourcegraph.com/github.com/klauspost/compress/-/badge.svg)](https://sourcegraph.com/github.com/klauspost/compress?badge)

# package usage

Use `go get github.com/klauspost/compress@latest` to add it to your project.

This package will support the current Go version and 2 versions back.

* Use the `nounsafe` tag to disable all use of the "unsafe" package.
* Use the `noasm` tag to disable all assembly across packages.

Use the links above for more information on each.

# changelog

* Sep 23rd, 2024 - [1.17.10](/~https://github.com/klauspost/compress/releases/tag/v1.17.10)
Expand Down
7 changes: 4 additions & 3 deletions flate/fast_encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
package flate

import (
"encoding/binary"
"fmt"

"github.com/klauspost/compress/internal/le"
)

type fastEnc interface {
Expand Down Expand Up @@ -58,11 +59,11 @@ const (
)

func load3232(b []byte, i int32) uint32 {
return binary.LittleEndian.Uint32(b[i:])
return le.Load32(b, i)
}

func load6432(b []byte, i int32) uint64 {
return binary.LittleEndian.Uint64(b[i:])
return le.Load64(b, i)
}

type tableEntry struct {
Expand Down
1 change: 0 additions & 1 deletion flate/fuzz_test.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
//go:build go1.18
// +build go1.18

package flate

Expand Down
19 changes: 10 additions & 9 deletions flate/huffman_bit_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
package flate

import (
"encoding/binary"
"fmt"
"io"
"math"

"github.com/klauspost/compress/internal/le"
)

const (
Expand Down Expand Up @@ -438,7 +439,7 @@ func (w *huffmanBitWriter) writeOutBits() {
n := w.nbytes

// We over-write, but faster...
binary.LittleEndian.PutUint64(w.bytes[n:], bits)
le.Store64(w.bytes[n:], bits)
n += 6

if n >= bufferFlushSize {
Expand Down Expand Up @@ -854,7 +855,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
bits |= c.code64() << (nbits & 63)
nbits += c.len()
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
le.Store64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
Expand Down Expand Up @@ -882,7 +883,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
bits |= c.code64() << (nbits & 63)
nbits += c.len()
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
le.Store64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
Expand All @@ -905,7 +906,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
bits |= uint64(extraLength) << (nbits & 63)
nbits += extraLengthBits
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
le.Store64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
Expand All @@ -931,7 +932,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
bits |= c.code64() << (nbits & 63)
nbits += c.len()
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
le.Store64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
Expand All @@ -953,7 +954,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63)
nbits += uint8(offsetComb)
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
le.Store64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
Expand Down Expand Up @@ -1107,7 +1108,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// We must have at least 48 bits free.
if nbits >= 8 {
n := nbits >> 3
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
le.Store64(w.bytes[nbytes:], bits)
bits >>= (n * 8) & 63
nbits -= n * 8
nbytes += n
Expand Down Expand Up @@ -1136,7 +1137,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// Remaining...
for _, t := range input {
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
le.Store64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
Expand Down
31 changes: 16 additions & 15 deletions flate/level1.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package flate

import (
"encoding/binary"
"fmt"
"math/bits"

"github.com/klauspost/compress/internal/le"
)

// fastGen maintains the table for matches,
Expand Down Expand Up @@ -126,26 +127,26 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
l = e.matchlenLong(s+4, t+4, src) + 4
} else {
// inlined:
a := src[s+4:]
b := src[t+4:]
for len(a) >= 8 {
if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
a := src[s:]
b := src[t:]
left := len(a) - 4
for left >= 8 {
if diff := le.Load64(a, l) ^ le.Load64(b, l); diff != 0 {
l += int32(bits.TrailingZeros64(diff) >> 3)
break
goto endMatch
}
l += 8
a = a[8:]
b = b[8:]
left -= 8
}
if len(a) < 8 {
b = b[:len(a)]
for i := range a {
if a[i] != b[i] {
break
}
l++
a = a[l:]
b = b[l:]
for i := range a {
if a[i] != b[i] {
break
}
l++
}
endMatch:
}

// Extend backwards
Expand Down
12 changes: 8 additions & 4 deletions flate/matchlen_generic.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,31 @@
package flate

import (
"encoding/binary"
"math/bits"

"github.com/klauspost/compress/internal/le"
)

// matchLen returns the maximum common prefix length of a and b.
// a must be the shortest of the two.
func matchLen(a, b []byte) (n int) {
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
left := len(a)
for left >= 8 {
diff := le.Load64(a, n) ^ le.Load64(b, n)
if diff != 0 {
return n + bits.TrailingZeros64(diff)>>3
}
n += 8
left -= 8
}

a = a[n:]
b = b[n:]
for i := range a {
if a[i] != b[i] {
break
}
n++
}
return n

}
Loading

0 comments on commit 4fa2036

Please sign in to comment.