git-sizer icon indicating copy to clipboard operation
git-sizer copied to clipboard

Introduced sha256 support for git-sizer

Open fcharlie opened this issue 2 years ago • 2 comments

image

fcharlie avatar Apr 17 '23 13:04 fcharlie

@fcharlie I rebased this onto #117 to resolve merge conflicts, and also changed a few things on the way (please double-check!):

  • The dependencies were updated in a separate commit, to make reviewing this PR easier.
  • The hex size is double the byte size of the SHAs, I modified the code in NewObjectIter() accordingly.
  • The hex size of SHA-256 is not 32, but 64. I modified the code comment of the NewOID() function.
  • Most importantly, I added a test to verify that this thing now does support SHA-256.
Here is the range-diff
  • -: ------- > 1: 69418a9 Re-format some comments

  • -: ------- > 2: 16b2fa2 Update dependencies

  • 1: e8687bb ! 3: 69ac7d8 Introduced sha256 support for git-sizer

    @@ Metadata
     Author: Force Charlie <[email protected]>
    
      ## Commit message ##
    -    Introduced sha256 support for git-sizer
    +    Introduce sha256 support for git-sizer
    
      ## git/git.go ##
     @@ git/git.go: type Repository struct {
    @@ git/git.go: type Repository struct {
     +	hashAlgo HashAlgo
      }
    
    - // smartJoin returns the path that can be described as `relPath`
    -@@ git/git.go: func NewRepository(path string) (*Repository, error) {
    - 	if err == nil {
    - 		return nil, errors.New("this appears to be a shallow clone; full clone required")
    + // smartJoin returns `relPath` if it is an absolute path. If not, it
    +@@ git/git.go: func NewRepositoryFromGitDir(gitDir string) (*Repository, error) {
    + 		)
      	}
    + 
     +	hashAlgo := HashSHA1
    -+	cmd = exec.Command(gitBin, "rev-parse", "--show-object-format")
    -+	if out, err = cmd.Output(); err == nil {
    ++	//nolint:gosec // `gitDir` is the path we need Git to access.
    ++	cmd := exec.Command(gitBin, "--git-dir", gitDir, "rev-parse", "--show-object-format")
    ++	if out, err := cmd.Output(); err == nil {
     +		if string(bytes.TrimSpace(out)) == "sha256" {
     +			hashAlgo = HashSHA256
     +		}
     +	}
    - 
    - 	return &Repository{
    --		path:   gitDir,
    ++
    + 	repo := Repository{
    +-		gitDir: gitDir,
     -		gitBin: gitBin,
    -+		path:     gitDir,
    ++		gitDir:   gitDir,
     +		gitBin:   gitBin,
     +		hashAlgo: hashAlgo,
    - 	}, nil
    - }
    + 	}
    
    -@@ git/git.go: func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd {
    - func (repo *Repository) Path() string {
    - 	return repo.path
    + 	full, err := repo.IsFull()
    +@@ git/git.go: func (repo *Repository) GitPath(relPath string) (string, error) {
    + 	// current directory, we can use it as-is:
    + 	return string(bytes.TrimSpace(out)), nil
      }
     +
     +func (repo *Repository) HashAlgo() HashAlgo {
    @@ git/obj_iter.go: func (repo *Repository) NewObjectIter(ctx context.Context) (*Ob
      		headerCh: make(chan BatchHeader),
      	}
     -
    -+	hashSize := repo.HashSize()
    ++	hashHexSize := repo.HashSize() * 2
      	iter.p.Add(
      		// Read OIDs from `iter.oidCh` and write them to `git
      		// rev-list`:
    @@ git/obj_iter.go: func (repo *Repository) NewObjectIter(ctx context.Context) (*Ob
      			"copy-oids",
      			func(_ context.Context, _ pipe.Env, line []byte, stdout *bufio.Writer) error {
     -				if len(line) < 40 {
    -+				if len(line) < hashSize {
    ++				if len(line) < hashHexSize {
      					return fmt.Errorf("line too short: '%s'", line)
      				}
     -				if _, err := stdout.Write(line[:40]); err != nil {
    -+				if _, err := stdout.Write(line[:hashSize]); err != nil {
    ++				if _, err := stdout.Write(line[:hashHexSize]); err != nil {
      					return fmt.Errorf("writing OID to 'git cat-file': %w", err)
      				}
      				if err := stdout.WriteByte('\n'); err != nil {
    
    + ## git/obj_resolver.go ##
    +@@ git/obj_resolver.go: func (repo *Repository) ResolveObject(name string) (OID, error) {
    + 	cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name)
    + 	output, err := cmd.Output()
    + 	if err != nil {
    +-		return NullOID, fmt.Errorf("resolving object %q: %w", name, err)
    ++		return repo.HashAlgo().NullOID(), fmt.Errorf("resolving object %q: %w", name, err)
    + 	}
    + 	oidString := string(bytes.TrimSpace(output))
    + 	oid, err := NewOID(oidString)
    + 	if err != nil {
    +-		return NullOID, fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err)
    ++		return repo.HashAlgo().NullOID(), fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err)
    + 	}
    + 	return oid, nil
    + }
    +
      ## git/oid.go ##
     @@
      package git
    
      import (
     +	"bytes"
    ++	//nolint:gosec // Git indeed does use SHA-1, still
     +	"crypto/sha1"
     +	"crypto/sha256"
      	"encoding/hex"
    @@ git/oid.go
      }
    
     -// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40}`)
    -+// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40,32}`)
    ++// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40,64}`)
      // into an `OID`.
      func NewOID(s string) (OID, error) {
      	oidBytes, err := hex.DecodeString(s)
    @@ git/oid.go: func NewOID(s string) (OID, error) {
      	dst[0] = '"'
      	dst[len(dst)-1] = '"'
    
    - ## git/ref_filter.go ##
    -@@ git/ref_filter.go: func (f intersection) Filter(refname string) bool {
    - // If `f1` is `nil`, it is treated as including nothing.
    - type include struct{}
    - 
    --func (_ include) Combine(f1, f2 ReferenceFilter) ReferenceFilter {
    -+func (include) Combine(f1, f2 ReferenceFilter) ReferenceFilter {
    - 	if f1 == nil {
    - 		return f2
    - 	}
    - 	return union{f1, f2}
    - }
    - 
    --func (_ include) Inverted() Combiner {
    -+func (include) Inverted() Combiner {
    - 	return Exclude
    - }
    - 
    -@@ git/ref_filter.go: func (f union) Filter(refname string) bool {
    - // If `f1` is `nil`, it is treated as including everything.
    - type exclude struct{}
    - 
    --func (_ exclude) Combine(f1, f2 ReferenceFilter) ReferenceFilter {
    -+func (exclude) Combine(f1, f2 ReferenceFilter) ReferenceFilter {
    - 	if f1 == nil {
    - 		return inverse{f2}
    - 	}
    -@@ git/ref_filter.go: func (_ exclude) Combine(f1, f2 ReferenceFilter) ReferenceFilter {
    - 
    - }
    - 
    --func (_ exclude) Inverted() Combiner {
    -+func (exclude) Inverted() Combiner {
    - 	return include{}
    - }
    - 
    -@@ git/ref_filter.go: var Exclude exclude
    - 
    - type allReferencesFilter struct{}
    - 
    --func (_ allReferencesFilter) Filter(_ string) bool {
    -+func (allReferencesFilter) Filter(_ string) bool {
    - 	return true
    - }
    - 
    -@@ git/ref_filter.go: var AllReferencesFilter allReferencesFilter
    - // whose names start with the specified `prefix`, which must match at
    - // a component boundary. For example,
    - //
    --// * Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not
    --//   "refs/foobar".
    -+//   - Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not
    -+//     "refs/foobar".
    - //
    --// * Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or
    --//   "refs/foobar".
    -+//   - Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or
    -+//     "refs/foobar".
    - func PrefixFilter(prefix string) ReferenceFilter {
    - 	if prefix == "" {
    - 		return AllReferencesFilter
    -
      ## git/tree.go ##
     @@ git/tree.go: import (
    
    @@ git/tree.go: func (iter *TreeIter) NextEntry() (TreeEntry, bool, error) {
      	return entry, true, nil
      }
    
    - ## go.mod ##
    -@@ go.mod: module github.com/github/git-sizer
    - go 1.17
    - 
    - require (
    --	github.com/cli/safeexec v1.0.0
    --	github.com/davecgh/go-spew v1.1.1 // indirect
    -+	github.com/cli/safeexec v1.0.1
    -+	github.com/github/go-pipe v1.0.2
    - 	github.com/spf13/pflag v1.0.5
    --	github.com/stretchr/testify v1.8.1
    --	golang.org/x/sync v0.1.0 // indirect
    -+	github.com/stretchr/testify v1.8.2
    - )
    - 
    --require github.com/github/go-pipe v1.0.2
    --
    - require (
    --	github.com/kr/pretty v0.1.0 // indirect
    -+	github.com/davecgh/go-spew v1.1.1 // indirect
    -+	github.com/kr/pretty v0.3.1 // indirect
    - 	github.com/pmezard/go-difflib v1.0.0 // indirect
    --	gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect
    -+	golang.org/x/sync v0.1.0 // indirect
    -+	gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
    - 	gopkg.in/yaml.v3 v3.0.1 // indirect
    - )
    -
    - ## go.sum ##
    -@@
    --github.com/cli/safeexec v1.0.0 h1:0VngyaIyqACHdcMNWfo6+KdUYnqEr2Sg+bSP1pdF+dI=
    --github.com/cli/safeexec v1.0.0/go.mod h1:Z/D4tTN8Vs5gXYHDCbaM1S/anmEDnJb1iW0+EJ5zx3Q=
    -+github.com/cli/safeexec v1.0.1 h1:e/C79PbXF4yYTN/wauC4tviMxEV13BwljGj0N9j+N00=
    -+github.com/cli/safeexec v1.0.1/go.mod h1:Z/D4tTN8Vs5gXYHDCbaM1S/anmEDnJb1iW0+EJ5zx3Q=
    - github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
    - github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
    - github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
    - github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
    - github.com/github/go-pipe v1.0.2 h1:befTXflsc6ir/h9f6Q7QCDmfojoBswD1MfQrPhmmSoA=
    - github.com/github/go-pipe v1.0.2/go.mod h1:/GvNLA516QlfGGMtfv4PC/5/CdzL9X4af/AJYhmLD54=
    --github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
    - github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
    -+github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
    -+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
    -+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
    - github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
    - github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
    - github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
    - github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
    -+github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
    - github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
    - github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
    -+github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
    -+github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
    - github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
    - github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
    - github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
    -@@ go.sum: github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS
    - github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
    - github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
    - github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
    --github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
    - github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
    -+github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8=
    -+github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
    - github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
    - go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk=
    - go.uber.org/goleak v1.2.0/go.mod h1:XJYK+MuIchqpmGmUSAzotztawfKvYLUIgg7guXrwVUo=
    -@@ go.sum: golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
    - golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
    - golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
    - gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
    --gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
    - gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
    -+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
    -+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
    - gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
    - gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
    - gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
    -
      ## internal/testutils/repoutils.go ##
     @@ internal/testutils/repoutils.go: func (repo *TestRepo) UpdateRef(t *testing.T, refname string, oid git.OID) {
    
    @@ internal/testutils/repoutils.go: func (repo *TestRepo) UpdateRef(t *testing.T, r
    
      ## sizes/graph.go ##
     @@ sizes/graph.go: func ScanRepositoryUsingGraph(
    + 	nameStyle NameStyle,
    + 	progressMeter meter.Progress,
      ) (HistorySize, error) {
    - 	ctx, cancel := context.WithCancel(context.TODO())
    - 	defer cancel()
    --
     +	nullOID := repo.HashAlgo().NullOID()
    - 	graph := NewGraph(rg, nameStyle)
    + 	graph := NewGraph(nameStyle)
    
    - 	refIter, err := repo.NewReferenceIter(ctx)
    + 	objIter, err := repo.NewObjectIter(ctx)
     @@ sizes/graph.go: func ScanRepositoryUsingGraph(
      		case "tree":
      			trees = append(trees, ObjectHeader{obj.OID, obj.ObjectSize})
    @@ sizes/output.go: func (i *item) MarshalJSON() ([]byte, error) {
      		stat.ObjectName = i.path.OID.String()
      		stat.ObjectDescription = i.path.Path()
      	}
    -@@ sizes/output.go: func (t *Threshold) Type() string {
    - // A `pflag.Value` that can be used as a boolean option that sets a
    - // `Threshold` variable to a fixed value. For example,
    - //
    --//		pflag.Var(
    --//			sizes.NewThresholdFlagValue(&threshold, 30),
    --//			"critical", "only report critical statistics",
    --//		)
    -+//	pflag.Var(
    -+//		sizes.NewThresholdFlagValue(&threshold, 30),
    -+//		"critical", "only report critical statistics",
    -+//	)
    - //
    - // adds a `--critical` flag that sets `threshold` to 30.
    - type thresholdFlagValue struct {
    
  • -: ------- > 4: 0fc1aa3 Add a test case for SHA-256 support

dscho avatar Dec 14 '23 09:12 dscho

@dscho Great job, I used in-house tools to convert git-sizer to a sha256 repository, then ran a git-sizer scan and everything worked.

image image

fcharlie avatar Dec 23 '23 06:12 fcharlie