librsync-go icon indicating copy to clipboard operation
librsync-go copied to clipboard

[WIP] Less Garbage

Open lmbarros opened this issue 1 year ago • 1 comments

Benchmarks on master:

lmb@Ahab ~/balena/balena-os/librsync-go (master)]go test -run ^$ -bench Benchmark                      
goos: linux
goarch: amd64
pkg: github.com/balena-os/librsync-go
cpu: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz
BenchmarkSignature1MB-12          	     385	   3092974 ns/op	 323.31 MB/s	  403036 B/op	    3990 allocs/op
BenchmarkSignature1GB-12          	       1	3343824747 ns/op	 299.06 MB/s	473541576 B/op	 3953816 allocs/op
BenchmarkDeltaChangeTail1GB-12    	       1	17628730864 ns/op	  56.73 MB/s	384167872 B/op	 2295191 allocs/op
--- BENCH: BenchmarkDeltaChangeTail1GB-12
    benchmark_test.go:55: raw   size:    1000000000 bytes
    benchmark_test.go:56: delta size:    102514043 bytes (10.25%)
BenchmarkDeltaChangeTail1MB-12    	     100	  11353717 ns/op	  88.08 MB/s	16960032 B/op	    1804 allocs/op
--- BENCH: BenchmarkDeltaChangeTail1MB-12
    benchmark_test.go:55: raw   size:    1000000 bytes
    benchmark_test.go:56: delta size:    100432 bytes (10.04%)
    benchmark_test.go:55: raw   size:    1000000 bytes
    benchmark_test.go:56: delta size:    100432 bytes (10.04%)
    benchmark_test.go:55: raw   size:    1000000 bytes
    benchmark_test.go:56: delta size:    100432 bytes (10.04%)
    benchmark_test.go:55: raw   size:    1000000 bytes
    benchmark_test.go:56: delta size:    100432 bytes (10.04%)
    benchmark_test.go:55: raw   size:    1000000 bytes
    benchmark_test.go:56: delta size:    100432 bytes (10.04%)
	... [output truncated]
BenchmarkDeltaAppend1GB-12        	       1	19001528513 ns/op	  52.63 MB/s	391444096 B/op	 2492307 allocs/op
--- BENCH: BenchmarkDeltaAppend1GB-12
    benchmark_test.go:96: raw   size:    1000000000 bytes
    benchmark_test.go:97: delta size:    102572947 bytes (10.26%)
BenchmarkDeltaAppend1MB-12        	     100	  12778517 ns/op	  78.26 MB/s	16966296 B/op	    2000 allocs/op
--- BENCH: BenchmarkDeltaAppend1MB-12
    benchmark_test.go:96: raw   size:    1000000 bytes
    benchmark_test.go:97: delta size:    100080 bytes (10.01%)
    benchmark_test.go:96: raw   size:    1000000 bytes
    benchmark_test.go:97: delta size:    100080 bytes (10.01%)
    benchmark_test.go:96: raw   size:    1000000 bytes
    benchmark_test.go:97: delta size:    100080 bytes (10.01%)
    benchmark_test.go:96: raw   size:    1000000 bytes
    benchmark_test.go:97: delta size:    100080 bytes (10.01%)
    benchmark_test.go:96: raw   size:    1000000 bytes
    benchmark_test.go:97: delta size:    100080 bytes (10.01%)
	... [output truncated]
BenchmarkDeltaPrepend1GB-12       	       1	18723547755 ns/op	  53.41 MB/s	347557904 B/op	 2490182 allocs/op
--- BENCH: BenchmarkDeltaPrepend1GB-12
    benchmark_test.go:132: raw   size:    1000000000 bytes
    benchmark_test.go:133: delta size:    102542669 bytes (10.25%)
BenchmarkDeltaPrepend1MB-12       	      98	  12320118 ns/op	  81.17 MB/s	16966316 B/op	    2002 allocs/op
--- BENCH: BenchmarkDeltaPrepend1MB-12
    benchmark_test.go:132: raw   size:    1000000 bytes
    benchmark_test.go:133: delta size:    100082 bytes (10.01%)
    benchmark_test.go:132: raw   size:    1000000 bytes
    benchmark_test.go:133: delta size:    100082 bytes (10.01%)
    benchmark_test.go:132: raw   size:    1000000 bytes
    benchmark_test.go:133: delta size:    100082 bytes (10.01%)
    benchmark_test.go:132: raw   size:    1000000 bytes
    benchmark_test.go:133: delta size:    100082 bytes (10.01%)
    benchmark_test.go:132: raw   size:    1000000 bytes
    benchmark_test.go:133: delta size:    100082 bytes (10.01%)
	... [output truncated]
BenchmarkDeltaInpend1GB-12        	       1	18683009675 ns/op	  53.52 MB/s	373965968 B/op	 2490953 allocs/op
--- BENCH: BenchmarkDeltaInpend1GB-12
    benchmark_test.go:175: raw   size:    1000000000 bytes
    benchmark_test.go:176: delta size:    102517056 bytes (10.25%)
BenchmarkDeltaInpend1MB-12        	      98	  12491894 ns/op	  80.05 MB/s	16966335 B/op	    2006 allocs/op
--- BENCH: BenchmarkDeltaInpend1MB-12
    benchmark_test.go:175: raw   size:    1000000 bytes
    benchmark_test.go:176: delta size:    100603 bytes (10.06%)
    benchmark_test.go:175: raw   size:    1000000 bytes
    benchmark_test.go:176: delta size:    100603 bytes (10.06%)
    benchmark_test.go:175: raw   size:    1000000 bytes
    benchmark_test.go:176: delta size:    100603 bytes (10.06%)
    benchmark_test.go:175: raw   size:    1000000 bytes
    benchmark_test.go:176: delta size:    100603 bytes (10.06%)
    benchmark_test.go:175: raw   size:    1000000 bytes
    benchmark_test.go:176: delta size:    100603 bytes (10.06%)
	... [output truncated]
BenchmarkDeltaCutTail1GB-12       	       1	8613747738 ns/op	 116.09 MB/s	78779640 B/op	 1795619 allocs/op
--- BENCH: BenchmarkDeltaCutTail1GB-12
    benchmark_test.go:213: raw   size:    1000000000 bytes
    benchmark_test.go:214: delta size:    2571460 bytes (0.26%)
BenchmarkDeltaCutTail1MB-12       	     100	  10593485 ns/op	  94.40 MB/s	16848463 B/op	    1800 allocs/op
--- BENCH: BenchmarkDeltaCutTail1MB-12
    benchmark_test.go:213: raw   size:    1000000 bytes
    benchmark_test.go:214: delta size:    430 bytes (0.04%)
    benchmark_test.go:213: raw   size:    1000000 bytes
    benchmark_test.go:214: delta size:    430 bytes (0.04%)
    benchmark_test.go:213: raw   size:    1000000 bytes
    benchmark_test.go:214: delta size:    430 bytes (0.04%)
    benchmark_test.go:213: raw   size:    1000000 bytes
    benchmark_test.go:214: delta size:    430 bytes (0.04%)
    benchmark_test.go:213: raw   size:    1000000 bytes
    benchmark_test.go:214: delta size:    430 bytes (0.04%)
	... [output truncated]
BenchmarkDeltaCutHead1GB-12       	       1	7807276551 ns/op	 128.09 MB/s	72435296 B/op	 1593249 allocs/op
--- BENCH: BenchmarkDeltaCutHead1GB-12
    benchmark_test.go:255: raw   size:    1000000000 bytes
    benchmark_test.go:256: delta size:    2100307 bytes (0.21%)
BenchmarkDeltaCutHead1MB-12       	     100	  10680150 ns/op	  93.63 MB/s	16843209 B/op	    1604 allocs/op
--- BENCH: BenchmarkDeltaCutHead1MB-12
    benchmark_test.go:255: raw   size:    1000000 bytes
    benchmark_test.go:256: delta size:    433 bytes (0.04%)
    benchmark_test.go:255: raw   size:    1000000 bytes
    benchmark_test.go:256: delta size:    433 bytes (0.04%)
    benchmark_test.go:255: raw   size:    1000000 bytes
    benchmark_test.go:256: delta size:    433 bytes (0.04%)
    benchmark_test.go:255: raw   size:    1000000 bytes
    benchmark_test.go:256: delta size:    433 bytes (0.04%)
    benchmark_test.go:255: raw   size:    1000000 bytes
    benchmark_test.go:256: delta size:    433 bytes (0.04%)
	... [output truncated]
PASS
ok  	github.com/balena-os/librsync-go	122.522s

Benchmarks on the branch:

lmb@Ahab ~/balena/balena-os/librsync-go (lmb/less-garbage)]go test -run ^$ -bench Benchmark
goos: linux
goarch: amd64
pkg: github.com/balena-os/librsync-go
cpu: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz
BenchmarkSignature1MB-12          	     394	   3000603 ns/op	 333.27 MB/s	  293196 B/op	    1977 allocs/op
BenchmarkSignature1GB-12          	       1	3200420741 ns/op	 312.46 MB/s	133039280 B/op	 1953161 allocs/op
BenchmarkDeltaChangeTail1GB-12    	       1	17771930008 ns/op	  56.27 MB/s	312229776 B/op	   24437 allocs/op
--- BENCH: BenchmarkDeltaChangeTail1GB-12
    benchmark_test.go:54: raw   size:    1000000000 bytes
    benchmark_test.go:55: delta size:    102557525 bytes (10.26%)
BenchmarkDeltaChangeTail1MB-12    	     100	  11572596 ns/op	  86.41 MB/s	16904337 B/op	      50 allocs/op
--- BENCH: BenchmarkDeltaChangeTail1MB-12
    benchmark_test.go:54: raw   size:    1000000 bytes
    benchmark_test.go:55: delta size:    100432 bytes (10.04%)
    benchmark_test.go:54: raw   size:    1000000 bytes
    benchmark_test.go:55: delta size:    100432 bytes (10.04%)
    benchmark_test.go:54: raw   size:    1000000 bytes
    benchmark_test.go:55: delta size:    100432 bytes (10.04%)
    benchmark_test.go:54: raw   size:    1000000 bytes
    benchmark_test.go:55: delta size:    100432 bytes (10.04%)
    benchmark_test.go:54: raw   size:    1000000 bytes
    benchmark_test.go:55: delta size:    100432 bytes (10.04%)
	... [output truncated]
BenchmarkDeltaAppend1GB-12        	       1	18839059064 ns/op	  53.08 MB/s	311737568 B/op	   24152 allocs/op
--- BENCH: BenchmarkDeltaAppend1GB-12
    benchmark_test.go:95: raw   size:    1000000000 bytes
    benchmark_test.go:96: delta size:    102529053 bytes (10.25%)
BenchmarkDeltaAppend1MB-12        	      94	  12629384 ns/op	  79.18 MB/s	16904318 B/op	      50 allocs/op
--- BENCH: BenchmarkDeltaAppend1MB-12
    benchmark_test.go:95: raw   size:    1000000 bytes
    benchmark_test.go:96: delta size:    100080 bytes (10.01%)
    benchmark_test.go:95: raw   size:    1000000 bytes
    benchmark_test.go:96: delta size:    100080 bytes (10.01%)
    benchmark_test.go:95: raw   size:    1000000 bytes
    benchmark_test.go:96: delta size:    100080 bytes (10.01%)
    benchmark_test.go:95: raw   size:    1000000 bytes
    benchmark_test.go:96: delta size:    100080 bytes (10.01%)
    benchmark_test.go:95: raw   size:    1000000 bytes
    benchmark_test.go:96: delta size:    100080 bytes (10.01%)
	... [output truncated]
BenchmarkDeltaPrepend1GB-12       	       1	18874080312 ns/op	  52.98 MB/s	268650944 B/op	   24684 allocs/op
--- BENCH: BenchmarkDeltaPrepend1GB-12
    benchmark_test.go:131: raw   size:    1000000000 bytes
    benchmark_test.go:132: delta size:    102586533 bytes (10.26%)
BenchmarkDeltaPrepend1MB-12       	      94	  12454396 ns/op	  80.29 MB/s	16904352 B/op	      57 allocs/op
--- BENCH: BenchmarkDeltaPrepend1MB-12
    benchmark_test.go:131: raw   size:    1000000 bytes
    benchmark_test.go:132: delta size:    100082 bytes (10.01%)
    benchmark_test.go:131: raw   size:    1000000 bytes
    benchmark_test.go:132: delta size:    100606 bytes (10.06%)
    benchmark_test.go:131: raw   size:    1000000 bytes
    benchmark_test.go:132: delta size:    100606 bytes (10.06%)
    benchmark_test.go:131: raw   size:    1000000 bytes
    benchmark_test.go:132: delta size:    100606 bytes (10.06%)
    benchmark_test.go:131: raw   size:    1000000 bytes
    benchmark_test.go:132: delta size:    100606 bytes (10.06%)
	... [output truncated]
BenchmarkDeltaInpend1GB-12        	       1	18906083387 ns/op	  52.89 MB/s	296135328 B/op	   25407 allocs/op
--- BENCH: BenchmarkDeltaInpend1GB-12
    benchmark_test.go:174: raw   size:    1000000000 bytes
    benchmark_test.go:175: delta size:    102661316 bytes (10.27%)
BenchmarkDeltaInpend1MB-12        	      97	  12975511 ns/op	  77.07 MB/s	16904417 B/op	      57 allocs/op
--- BENCH: BenchmarkDeltaInpend1MB-12
    benchmark_test.go:174: raw   size:    1000000 bytes
    benchmark_test.go:175: delta size:    100603 bytes (10.06%)
    benchmark_test.go:174: raw   size:    1000000 bytes
    benchmark_test.go:175: delta size:    100603 bytes (10.06%)
    benchmark_test.go:174: raw   size:    1000000 bytes
    benchmark_test.go:175: delta size:    100603 bytes (10.06%)
    benchmark_test.go:174: raw   size:    1000000 bytes
    benchmark_test.go:175: delta size:    100603 bytes (10.06%)
    benchmark_test.go:174: raw   size:    1000000 bytes
    benchmark_test.go:175: delta size:    100603 bytes (10.06%)
	... [output truncated]
BenchmarkDeltaCutTail1GB-12       	       1	8750044978 ns/op	 114.29 MB/s	22103672 B/op	   24085 allocs/op
--- BENCH: BenchmarkDeltaCutTail1GB-12
    benchmark_test.go:212: raw   size:    1000000000 bytes
    benchmark_test.go:213: delta size:    2521970 bytes (0.25%)
BenchmarkDeltaCutTail1MB-12       	     100	  10134102 ns/op	  98.68 MB/s	16792740 B/op	      47 allocs/op
--- BENCH: BenchmarkDeltaCutTail1MB-12
    benchmark_test.go:212: raw   size:    1000000 bytes
    benchmark_test.go:213: delta size:    430 bytes (0.04%)
    benchmark_test.go:212: raw   size:    1000000 bytes
    benchmark_test.go:213: delta size:    430 bytes (0.04%)
    benchmark_test.go:212: raw   size:    1000000 bytes
    benchmark_test.go:213: delta size:    430 bytes (0.04%)
    benchmark_test.go:212: raw   size:    1000000 bytes
    benchmark_test.go:213: delta size:    430 bytes (0.04%)
    benchmark_test.go:212: raw   size:    1000000 bytes
    benchmark_test.go:213: delta size:    430 bytes (0.04%)
	... [output truncated]
BenchmarkDeltaCutHead1GB-12       	       1	7891398297 ns/op	 126.72 MB/s	22092368 B/op	   19749 allocs/op
--- BENCH: BenchmarkDeltaCutHead1GB-12
    benchmark_test.go:254: raw   size:    1000000000 bytes
    benchmark_test.go:255: delta size:    2069481 bytes (0.21%)
BenchmarkDeltaCutHead1MB-12       	     140	  10038620 ns/op	  99.62 MB/s	16793752 B/op	      45 allocs/op
--- BENCH: BenchmarkDeltaCutHead1MB-12
    benchmark_test.go:254: raw   size:    1000000 bytes
    benchmark_test.go:255: delta size:    433 bytes (0.04%)
    benchmark_test.go:254: raw   size:    1000000 bytes
    benchmark_test.go:255: delta size:    433 bytes (0.04%)
    benchmark_test.go:254: raw   size:    1000000 bytes
    benchmark_test.go:255: delta size:    433 bytes (0.04%)
    benchmark_test.go:254: raw   size:    1000000 bytes
    benchmark_test.go:255: delta size:    433 bytes (0.04%)
    benchmark_test.go:254: raw   size:    1000000 bytes
    benchmark_test.go:255: delta size:    433 bytes (0.04%)
	... [output truncated]
PASS
ok  	github.com/balena-os/librsync-go	123.219s

lmbarros avatar Jan 04 '24 20:01 lmbarros

For reference, an attempt that didn't help:

commit 09800c1303cf82049e62a6c97d9ceaf1ea29461b
Author: Leandro Motta Barros <[email protected]>
Date:   Thu Jan 4 16:41:12 2024 -0300

    [TENTATIVE] Avoid copying data
    
    Signed-off-by: Leandro Motta Barros <[email protected]>
    Change-type: patch

diff --git a/signature.go b/signature.go
index bfa1f81..6d33a69 100644
--- a/signature.go
+++ b/signature.go
@@ -65,10 +65,16 @@ func SignatureWithBlockCount(input io.Reader, output io.Writer, blockLen, strong
 
 	var ret SignatureType
 	ret.weak2block = make(map[uint32]int, blockCount)
-	ret.strongSigs = make([]byte, 0, blockCount*int(strongLen))
 	ret.sigType = sigType
 	ret.strongLen = strongLen
 	ret.blockLen = blockLen
+	// These 32 extra bytes we allocate here are to play nice with the
+	// SumAppending() call below. This call (at first) appends the whole strong
+	// sum to the slice, disregarding strongLen. So, if we allocate only
+	// blockCount*strongLen bytes here, the last call to SumAppending() will
+	// need to re-allocate and memcopy the whole thing. (This only happens when
+	// strongLen is smaller than the full strong sum size.)
+	ret.strongSigs = make([]byte, 0, blockCount*int(strongLen)+32)
 
 	summer, err := NewSummer(sigType, strongLen)
 	if err != nil {
@@ -100,11 +106,11 @@ func SignatureWithBlockCount(input io.Reader, output io.Writer, blockLen, strong
 			return nil, err
 		}
 
-		strong := summer.Sum(data)
-		output.Write(strong)
-
+		lenBefore := len(ret.strongSigs)
 		ret.weak2block[weak] = len(ret.strongSigs) / int(strongLen)
-		ret.strongSigs = append(ret.strongSigs, strong...)
+		ret.strongSigs = summer.SumAppending(data, ret.strongSigs)
+		strong := ret.strongSigs[lenBefore:]
+		output.Write(strong)
 	}
 
 	return &ret, nil
diff --git a/summer.go b/summer.go
index 51488c8..70e6059 100644
--- a/summer.go
+++ b/summer.go
@@ -53,3 +53,19 @@ func (s *Summer) Sum(data []byte) []byte {
 	s.hasher.Write(data)
 	return s.hasher.Sum(s.buffer[:0])[:s.strongLen]
 }
+
+// SumAppending appends the strong checksum of data to dst, and returns the
+// resulting slice.
+//
+// There's one implementation detail worth noting, as it "leaks" to the point
+// that it can impact performance: Initially, this method appends the whole
+// strong checksum to dst, disregarding s.strongLen. Only after that it will
+// re-slice the result to take s.strongLen into account.
+func (s *Summer) SumAppending(data []byte, dst []byte) []byte {
+	s.hasher.Reset()
+	s.hasher.Write(data)
+
+	lenBefore := len(dst)
+	dst = s.hasher.Sum(dst)
+	return dst[:lenBefore+int(s.strongLen)]
+}

lmbarros avatar Jan 04 '24 20:01 lmbarros