gnark MIMC collision problem

MIMC is based on finite field, Suppose there is a uint256 number a, and the fr.Element modulus is q, then this equation will hold: MIMC(a) = MIMC(a + q). I want to use MIMC function in circuit:

import (
	"bytes"
	"crypto/rand"
	"fmt"
	"github.com/consensys/gnark-crypto/ecc"
	"github.com/consensys/gnark-crypto/hash"
	"github.com/consensys/gnark/frontend"
	"github.com/consensys/gnark/std/hash/mimc"
	"github.com/consensys/gnark/test"
	"math/big"
	"testing"
	 mimc2 "github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc"
	"github.com/consensys/gnark/backend"
)

type FooConstraint struct {
	A1 frontend.Variable
	A2 frontend.Variable
	A3 frontend.Variable
	A4 frontend.Variable
}

func CollectDataFromFoo(api frontend.API, foo FooConstraint,
						hFunc *mimc.MiMC) {
	A1Bits := api.ToBinary(foo.A1, 64)
	A2Bits := api.ToBinary(foo.A2, 64)
	A3Bits := api.ToBinary(foo.A3, 64)
	A4Bits := api.ToBinary(foo.A4, 64)

	ABits := append(A1Bits, A2Bits...)
	ABits = append(ABits, A3Bits...)
	ABits = append(ABits, A4Bits...)
	A := api.FromBinary(ABits...)
	hFunc.Write(A)
}

func SetFooConstraints(a1 uint64, a2 uint64, a3 uint64, a4 uint64) (w FooConstraint) {
	w = FooConstraint{
		A1: a1,
		A2: a2,
		A3: a3,
		A4: a4,
	}
	return w
}
type MimcDemoCircuit struct {
    A            frontend.Variable
    FinalHash    frontend.Variable `gnark:",public"`
}

func (circuit MimcDemoCircuit) Define(api frontend.API) error {
	// mimc
	hFunc, err := mimc.NewMiMC(api)
	if err != nil {
		return err
	}
	CollectDataFromFoo(api, circuit.Foo, &hFunc)
	hash := hFunc.Sum()
	api.AssertIsEqual(hash, circuit.FinalHash)
	return nil
}

func Test4(t *testing.T) {
	var circuit, witness MimcDemoCircuit
	witness.Foo = SetFooConstraints(1, 1, 1, 1)
	hFunc := mimc2.NewMiMC()
	var buf bytes.Buffer

	// 2^192 + 2^128 + 2^64 + 1 + q
	// equal to FooConstraint {A1: 3486998266802970666, A2: 13281191951274694750, A3: 2896914383306846354, A4: 4891460686036598786}
	buf.Write(new(big.Int).SetUint64(3486998266802970666).Bytes())
	buf.Write(new(big.Int).SetUint64(13281191951274694750).Bytes())
	buf.Write(new(big.Int).SetUint64(2896914383306846354).Bytes())
	buf.Write(new(big.Int).SetUint64(4891460686036598786).Bytes())

	hFunc.Write(buf.Bytes())
	hashVal := hFunc.Sum(nil)
	witness.FinalHash = hashVal

	assert := test.NewAssert(t)
	assert.SolvingSucceeded(&circuit, &witness, test.WithBackends(backend.GROTH16), test.WithCurves(ecc.BN254), test.WithCompileOpts(frontend.IgnoreUnconstrainedInputs()))
}

This test passed, it shows that if prover provide verifier FooConstraint {A1: 3486998266802970666, A2: 13281191951274694750, A3: 2896914383306846354, A4: 4891460686036598786}, verifier use mimc calculate this hash, use it as circuit public input, the circuit verify can pass.

What should I do to make sure that verifier is convinced by the fact prover use FooConstraint {A1: 1, A2: 1, A3: 1, A4: 1} not FooConstraint {A1: 3486998266802970666, A2: 13281191951274694750, A3: 2896914383306846354, A4: 4891460686036598786} in circuit? One way I can think of is change CollectDataFromFoo to

func CollectDataFromFoo(api frontend.API, foo FooConstraint,
						hFunc *mimc.MiMC) {
	A1Bits := api.ToBinary(foo.A1, 64)
	A2Bits := api.ToBinary(foo.A2, 64)
	A3Bits := api.ToBinary(foo.A3, 64)

	ABits := append(A1Bits, A2Bits...)
	ABits = append(ABits, A3Bits...)
	A := api.FromBinary(ABits...)
	hFunc.Write(A)

        A4Bits := api.ToBinary(foo.A4, 64)
        B := api.FromBinary(A4Bits...)
        hFunc.Write(B)
}

And Verifier use the same way to calculate mimc hash. Is this a recommend way? Is there a better way?

May 19 '22 12:05 lightning-li

In the circuit, you truncate and pack A1..A4 into one element A1(64)||A2(64)||A3(64)||A4(64) why not write 4 elements into hash instead?

May 19 '22 17:05 yycen

In the circuit, you truncate and pack A1..A4 into one element A1(64)||A2(64)||A3(64)||A4(64) why not write 4 elements into hash instead?

There are many elements needed to be hashed, I want to reduce the number of constraints in circuit, and the overhead for verifier (L1 smart contract) to generate the same hash

May 20 '22 02:05 lightning-li

Hi, so I think the solution to the issue is to decompose the []byte slice buffer in the "real" (=non snark) version of mimc in base p (so the slice is interpreted as a number a0 + a1*r + a2*r^2 + .. + an*r^n where r is the size of the snark field), and use as blocks the digits ai. The r-basis decomposition ensures there is a one-to-one correspondence between sequences of blocks and byte slices.

Currently the implementation in gnark-crypto does not do that, but rather decomposes the []byte slice as blocks of size 256bits for instance for bn254. It's a security bug as a matter of fact, I raised the corresponding issue in gnark-crypto.

May 23 '22 16:05 ThomasPiellard

gnark gnark copied to clipboard

MIMC collision problem

gnark
gnark copied to clipboard