librsync-go
librsync-go copied to clipboard
[WIP] Less Garbage
Benchmarks on master
:
lmb@Ahab ~/balena/balena-os/librsync-go (master)]go test -run ^$ -bench Benchmark
goos: linux
goarch: amd64
pkg: github.com/balena-os/librsync-go
cpu: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz
BenchmarkSignature1MB-12 385 3092974 ns/op 323.31 MB/s 403036 B/op 3990 allocs/op
BenchmarkSignature1GB-12 1 3343824747 ns/op 299.06 MB/s 473541576 B/op 3953816 allocs/op
BenchmarkDeltaChangeTail1GB-12 1 17628730864 ns/op 56.73 MB/s 384167872 B/op 2295191 allocs/op
--- BENCH: BenchmarkDeltaChangeTail1GB-12
benchmark_test.go:55: raw size: 1000000000 bytes
benchmark_test.go:56: delta size: 102514043 bytes (10.25%)
BenchmarkDeltaChangeTail1MB-12 100 11353717 ns/op 88.08 MB/s 16960032 B/op 1804 allocs/op
--- BENCH: BenchmarkDeltaChangeTail1MB-12
benchmark_test.go:55: raw size: 1000000 bytes
benchmark_test.go:56: delta size: 100432 bytes (10.04%)
benchmark_test.go:55: raw size: 1000000 bytes
benchmark_test.go:56: delta size: 100432 bytes (10.04%)
benchmark_test.go:55: raw size: 1000000 bytes
benchmark_test.go:56: delta size: 100432 bytes (10.04%)
benchmark_test.go:55: raw size: 1000000 bytes
benchmark_test.go:56: delta size: 100432 bytes (10.04%)
benchmark_test.go:55: raw size: 1000000 bytes
benchmark_test.go:56: delta size: 100432 bytes (10.04%)
... [output truncated]
BenchmarkDeltaAppend1GB-12 1 19001528513 ns/op 52.63 MB/s 391444096 B/op 2492307 allocs/op
--- BENCH: BenchmarkDeltaAppend1GB-12
benchmark_test.go:96: raw size: 1000000000 bytes
benchmark_test.go:97: delta size: 102572947 bytes (10.26%)
BenchmarkDeltaAppend1MB-12 100 12778517 ns/op 78.26 MB/s 16966296 B/op 2000 allocs/op
--- BENCH: BenchmarkDeltaAppend1MB-12
benchmark_test.go:96: raw size: 1000000 bytes
benchmark_test.go:97: delta size: 100080 bytes (10.01%)
benchmark_test.go:96: raw size: 1000000 bytes
benchmark_test.go:97: delta size: 100080 bytes (10.01%)
benchmark_test.go:96: raw size: 1000000 bytes
benchmark_test.go:97: delta size: 100080 bytes (10.01%)
benchmark_test.go:96: raw size: 1000000 bytes
benchmark_test.go:97: delta size: 100080 bytes (10.01%)
benchmark_test.go:96: raw size: 1000000 bytes
benchmark_test.go:97: delta size: 100080 bytes (10.01%)
... [output truncated]
BenchmarkDeltaPrepend1GB-12 1 18723547755 ns/op 53.41 MB/s 347557904 B/op 2490182 allocs/op
--- BENCH: BenchmarkDeltaPrepend1GB-12
benchmark_test.go:132: raw size: 1000000000 bytes
benchmark_test.go:133: delta size: 102542669 bytes (10.25%)
BenchmarkDeltaPrepend1MB-12 98 12320118 ns/op 81.17 MB/s 16966316 B/op 2002 allocs/op
--- BENCH: BenchmarkDeltaPrepend1MB-12
benchmark_test.go:132: raw size: 1000000 bytes
benchmark_test.go:133: delta size: 100082 bytes (10.01%)
benchmark_test.go:132: raw size: 1000000 bytes
benchmark_test.go:133: delta size: 100082 bytes (10.01%)
benchmark_test.go:132: raw size: 1000000 bytes
benchmark_test.go:133: delta size: 100082 bytes (10.01%)
benchmark_test.go:132: raw size: 1000000 bytes
benchmark_test.go:133: delta size: 100082 bytes (10.01%)
benchmark_test.go:132: raw size: 1000000 bytes
benchmark_test.go:133: delta size: 100082 bytes (10.01%)
... [output truncated]
BenchmarkDeltaInpend1GB-12 1 18683009675 ns/op 53.52 MB/s 373965968 B/op 2490953 allocs/op
--- BENCH: BenchmarkDeltaInpend1GB-12
benchmark_test.go:175: raw size: 1000000000 bytes
benchmark_test.go:176: delta size: 102517056 bytes (10.25%)
BenchmarkDeltaInpend1MB-12 98 12491894 ns/op 80.05 MB/s 16966335 B/op 2006 allocs/op
--- BENCH: BenchmarkDeltaInpend1MB-12
benchmark_test.go:175: raw size: 1000000 bytes
benchmark_test.go:176: delta size: 100603 bytes (10.06%)
benchmark_test.go:175: raw size: 1000000 bytes
benchmark_test.go:176: delta size: 100603 bytes (10.06%)
benchmark_test.go:175: raw size: 1000000 bytes
benchmark_test.go:176: delta size: 100603 bytes (10.06%)
benchmark_test.go:175: raw size: 1000000 bytes
benchmark_test.go:176: delta size: 100603 bytes (10.06%)
benchmark_test.go:175: raw size: 1000000 bytes
benchmark_test.go:176: delta size: 100603 bytes (10.06%)
... [output truncated]
BenchmarkDeltaCutTail1GB-12 1 8613747738 ns/op 116.09 MB/s 78779640 B/op 1795619 allocs/op
--- BENCH: BenchmarkDeltaCutTail1GB-12
benchmark_test.go:213: raw size: 1000000000 bytes
benchmark_test.go:214: delta size: 2571460 bytes (0.26%)
BenchmarkDeltaCutTail1MB-12 100 10593485 ns/op 94.40 MB/s 16848463 B/op 1800 allocs/op
--- BENCH: BenchmarkDeltaCutTail1MB-12
benchmark_test.go:213: raw size: 1000000 bytes
benchmark_test.go:214: delta size: 430 bytes (0.04%)
benchmark_test.go:213: raw size: 1000000 bytes
benchmark_test.go:214: delta size: 430 bytes (0.04%)
benchmark_test.go:213: raw size: 1000000 bytes
benchmark_test.go:214: delta size: 430 bytes (0.04%)
benchmark_test.go:213: raw size: 1000000 bytes
benchmark_test.go:214: delta size: 430 bytes (0.04%)
benchmark_test.go:213: raw size: 1000000 bytes
benchmark_test.go:214: delta size: 430 bytes (0.04%)
... [output truncated]
BenchmarkDeltaCutHead1GB-12 1 7807276551 ns/op 128.09 MB/s 72435296 B/op 1593249 allocs/op
--- BENCH: BenchmarkDeltaCutHead1GB-12
benchmark_test.go:255: raw size: 1000000000 bytes
benchmark_test.go:256: delta size: 2100307 bytes (0.21%)
BenchmarkDeltaCutHead1MB-12 100 10680150 ns/op 93.63 MB/s 16843209 B/op 1604 allocs/op
--- BENCH: BenchmarkDeltaCutHead1MB-12
benchmark_test.go:255: raw size: 1000000 bytes
benchmark_test.go:256: delta size: 433 bytes (0.04%)
benchmark_test.go:255: raw size: 1000000 bytes
benchmark_test.go:256: delta size: 433 bytes (0.04%)
benchmark_test.go:255: raw size: 1000000 bytes
benchmark_test.go:256: delta size: 433 bytes (0.04%)
benchmark_test.go:255: raw size: 1000000 bytes
benchmark_test.go:256: delta size: 433 bytes (0.04%)
benchmark_test.go:255: raw size: 1000000 bytes
benchmark_test.go:256: delta size: 433 bytes (0.04%)
... [output truncated]
PASS
ok github.com/balena-os/librsync-go 122.522s
Benchmarks on the branch:
lmb@Ahab ~/balena/balena-os/librsync-go (lmb/less-garbage)]go test -run ^$ -bench Benchmark
goos: linux
goarch: amd64
pkg: github.com/balena-os/librsync-go
cpu: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz
BenchmarkSignature1MB-12 394 3000603 ns/op 333.27 MB/s 293196 B/op 1977 allocs/op
BenchmarkSignature1GB-12 1 3200420741 ns/op 312.46 MB/s 133039280 B/op 1953161 allocs/op
BenchmarkDeltaChangeTail1GB-12 1 17771930008 ns/op 56.27 MB/s 312229776 B/op 24437 allocs/op
--- BENCH: BenchmarkDeltaChangeTail1GB-12
benchmark_test.go:54: raw size: 1000000000 bytes
benchmark_test.go:55: delta size: 102557525 bytes (10.26%)
BenchmarkDeltaChangeTail1MB-12 100 11572596 ns/op 86.41 MB/s 16904337 B/op 50 allocs/op
--- BENCH: BenchmarkDeltaChangeTail1MB-12
benchmark_test.go:54: raw size: 1000000 bytes
benchmark_test.go:55: delta size: 100432 bytes (10.04%)
benchmark_test.go:54: raw size: 1000000 bytes
benchmark_test.go:55: delta size: 100432 bytes (10.04%)
benchmark_test.go:54: raw size: 1000000 bytes
benchmark_test.go:55: delta size: 100432 bytes (10.04%)
benchmark_test.go:54: raw size: 1000000 bytes
benchmark_test.go:55: delta size: 100432 bytes (10.04%)
benchmark_test.go:54: raw size: 1000000 bytes
benchmark_test.go:55: delta size: 100432 bytes (10.04%)
... [output truncated]
BenchmarkDeltaAppend1GB-12 1 18839059064 ns/op 53.08 MB/s 311737568 B/op 24152 allocs/op
--- BENCH: BenchmarkDeltaAppend1GB-12
benchmark_test.go:95: raw size: 1000000000 bytes
benchmark_test.go:96: delta size: 102529053 bytes (10.25%)
BenchmarkDeltaAppend1MB-12 94 12629384 ns/op 79.18 MB/s 16904318 B/op 50 allocs/op
--- BENCH: BenchmarkDeltaAppend1MB-12
benchmark_test.go:95: raw size: 1000000 bytes
benchmark_test.go:96: delta size: 100080 bytes (10.01%)
benchmark_test.go:95: raw size: 1000000 bytes
benchmark_test.go:96: delta size: 100080 bytes (10.01%)
benchmark_test.go:95: raw size: 1000000 bytes
benchmark_test.go:96: delta size: 100080 bytes (10.01%)
benchmark_test.go:95: raw size: 1000000 bytes
benchmark_test.go:96: delta size: 100080 bytes (10.01%)
benchmark_test.go:95: raw size: 1000000 bytes
benchmark_test.go:96: delta size: 100080 bytes (10.01%)
... [output truncated]
BenchmarkDeltaPrepend1GB-12 1 18874080312 ns/op 52.98 MB/s 268650944 B/op 24684 allocs/op
--- BENCH: BenchmarkDeltaPrepend1GB-12
benchmark_test.go:131: raw size: 1000000000 bytes
benchmark_test.go:132: delta size: 102586533 bytes (10.26%)
BenchmarkDeltaPrepend1MB-12 94 12454396 ns/op 80.29 MB/s 16904352 B/op 57 allocs/op
--- BENCH: BenchmarkDeltaPrepend1MB-12
benchmark_test.go:131: raw size: 1000000 bytes
benchmark_test.go:132: delta size: 100082 bytes (10.01%)
benchmark_test.go:131: raw size: 1000000 bytes
benchmark_test.go:132: delta size: 100606 bytes (10.06%)
benchmark_test.go:131: raw size: 1000000 bytes
benchmark_test.go:132: delta size: 100606 bytes (10.06%)
benchmark_test.go:131: raw size: 1000000 bytes
benchmark_test.go:132: delta size: 100606 bytes (10.06%)
benchmark_test.go:131: raw size: 1000000 bytes
benchmark_test.go:132: delta size: 100606 bytes (10.06%)
... [output truncated]
BenchmarkDeltaInpend1GB-12 1 18906083387 ns/op 52.89 MB/s 296135328 B/op 25407 allocs/op
--- BENCH: BenchmarkDeltaInpend1GB-12
benchmark_test.go:174: raw size: 1000000000 bytes
benchmark_test.go:175: delta size: 102661316 bytes (10.27%)
BenchmarkDeltaInpend1MB-12 97 12975511 ns/op 77.07 MB/s 16904417 B/op 57 allocs/op
--- BENCH: BenchmarkDeltaInpend1MB-12
benchmark_test.go:174: raw size: 1000000 bytes
benchmark_test.go:175: delta size: 100603 bytes (10.06%)
benchmark_test.go:174: raw size: 1000000 bytes
benchmark_test.go:175: delta size: 100603 bytes (10.06%)
benchmark_test.go:174: raw size: 1000000 bytes
benchmark_test.go:175: delta size: 100603 bytes (10.06%)
benchmark_test.go:174: raw size: 1000000 bytes
benchmark_test.go:175: delta size: 100603 bytes (10.06%)
benchmark_test.go:174: raw size: 1000000 bytes
benchmark_test.go:175: delta size: 100603 bytes (10.06%)
... [output truncated]
BenchmarkDeltaCutTail1GB-12 1 8750044978 ns/op 114.29 MB/s 22103672 B/op 24085 allocs/op
--- BENCH: BenchmarkDeltaCutTail1GB-12
benchmark_test.go:212: raw size: 1000000000 bytes
benchmark_test.go:213: delta size: 2521970 bytes (0.25%)
BenchmarkDeltaCutTail1MB-12 100 10134102 ns/op 98.68 MB/s 16792740 B/op 47 allocs/op
--- BENCH: BenchmarkDeltaCutTail1MB-12
benchmark_test.go:212: raw size: 1000000 bytes
benchmark_test.go:213: delta size: 430 bytes (0.04%)
benchmark_test.go:212: raw size: 1000000 bytes
benchmark_test.go:213: delta size: 430 bytes (0.04%)
benchmark_test.go:212: raw size: 1000000 bytes
benchmark_test.go:213: delta size: 430 bytes (0.04%)
benchmark_test.go:212: raw size: 1000000 bytes
benchmark_test.go:213: delta size: 430 bytes (0.04%)
benchmark_test.go:212: raw size: 1000000 bytes
benchmark_test.go:213: delta size: 430 bytes (0.04%)
... [output truncated]
BenchmarkDeltaCutHead1GB-12 1 7891398297 ns/op 126.72 MB/s 22092368 B/op 19749 allocs/op
--- BENCH: BenchmarkDeltaCutHead1GB-12
benchmark_test.go:254: raw size: 1000000000 bytes
benchmark_test.go:255: delta size: 2069481 bytes (0.21%)
BenchmarkDeltaCutHead1MB-12 140 10038620 ns/op 99.62 MB/s 16793752 B/op 45 allocs/op
--- BENCH: BenchmarkDeltaCutHead1MB-12
benchmark_test.go:254: raw size: 1000000 bytes
benchmark_test.go:255: delta size: 433 bytes (0.04%)
benchmark_test.go:254: raw size: 1000000 bytes
benchmark_test.go:255: delta size: 433 bytes (0.04%)
benchmark_test.go:254: raw size: 1000000 bytes
benchmark_test.go:255: delta size: 433 bytes (0.04%)
benchmark_test.go:254: raw size: 1000000 bytes
benchmark_test.go:255: delta size: 433 bytes (0.04%)
benchmark_test.go:254: raw size: 1000000 bytes
benchmark_test.go:255: delta size: 433 bytes (0.04%)
... [output truncated]
PASS
ok github.com/balena-os/librsync-go 123.219s
For reference, an attempt that didn't help:
commit 09800c1303cf82049e62a6c97d9ceaf1ea29461b
Author: Leandro Motta Barros <[email protected]>
Date: Thu Jan 4 16:41:12 2024 -0300
[TENTATIVE] Avoid copying data
Signed-off-by: Leandro Motta Barros <[email protected]>
Change-type: patch
diff --git a/signature.go b/signature.go
index bfa1f81..6d33a69 100644
--- a/signature.go
+++ b/signature.go
@@ -65,10 +65,16 @@ func SignatureWithBlockCount(input io.Reader, output io.Writer, blockLen, strong
var ret SignatureType
ret.weak2block = make(map[uint32]int, blockCount)
- ret.strongSigs = make([]byte, 0, blockCount*int(strongLen))
ret.sigType = sigType
ret.strongLen = strongLen
ret.blockLen = blockLen
+ // These 32 extra bytes we allocate here are to play nice with the
+ // SumAppending() call below. This call (at first) appends the whole strong
+ // sum to the slice, disregarding strongLen. So, if we allocate only
+ // blockCount*strongLen bytes here, the last call to SumAppending() will
+ // need to re-allocate and memcopy the whole thing. (This only happens when
+ // strongLen is smaller than the full strong sum size.)
+ ret.strongSigs = make([]byte, 0, blockCount*int(strongLen)+32)
summer, err := NewSummer(sigType, strongLen)
if err != nil {
@@ -100,11 +106,11 @@ func SignatureWithBlockCount(input io.Reader, output io.Writer, blockLen, strong
return nil, err
}
- strong := summer.Sum(data)
- output.Write(strong)
-
+ lenBefore := len(ret.strongSigs)
ret.weak2block[weak] = len(ret.strongSigs) / int(strongLen)
- ret.strongSigs = append(ret.strongSigs, strong...)
+ ret.strongSigs = summer.SumAppending(data, ret.strongSigs)
+ strong := ret.strongSigs[lenBefore:]
+ output.Write(strong)
}
return &ret, nil
diff --git a/summer.go b/summer.go
index 51488c8..70e6059 100644
--- a/summer.go
+++ b/summer.go
@@ -53,3 +53,19 @@ func (s *Summer) Sum(data []byte) []byte {
s.hasher.Write(data)
return s.hasher.Sum(s.buffer[:0])[:s.strongLen]
}
+
+// SumAppending appends the strong checksum of data to dst, and returns the
+// resulting slice.
+//
+// There's one implementation detail worth noting, as it "leaks" to the point
+// that it can impact performance: Initially, this method appends the whole
+// strong checksum to dst, disregarding s.strongLen. Only after that it will
+// re-slice the result to take s.strongLen into account.
+func (s *Summer) SumAppending(data []byte, dst []byte) []byte {
+ s.hasher.Reset()
+ s.hasher.Write(data)
+
+ lenBefore := len(dst)
+ dst = s.hasher.Sum(dst)
+ return dst[:lenBefore+int(s.strongLen)]
+}