foundry icon indicating copy to clipboard operation
foundry copied to clipboard

feat(invariant): scrape logs and return values and collect samples

Open grandizzy opened this issue 10 months ago • 4 comments

Motivation

Ref #51 https://forum.openzeppelin.com/t/using-automatic-analysis-tools-with-makerdao-contracts/1021/2

Solution

  • do not collect state from call if call revert
  • add BasicTxDetails and CallTargetDetails structs - contains fuzzed function (to be able too to decode result and for generating counterexample) and target abi (to decode events)
  • decode call result (if function has outputs) and logs (if any) and insert samples collected in dictionary, per var type. Samples are stored up to configured test run depth and are persisted / reused between test runs
  • when fuzzing a param from state dict, use save samples of param type with a weight of 50
  • some code cleanup and tests for collecting values from result and event log

Tests

forge test --mt invariant_check_dschief_with_return_value
contract SimpleDSChief {
    mapping(bytes32 => address) public slates;
    mapping(address => bytes32) public votes;
    mapping(address => uint256) public approvals;
    mapping(address => uint256) public deposits;
    bool public hacked = false;

    function lock(uint256 wad) public {
        deposits[msg.sender] = add(deposits[msg.sender], wad);
        addWeight(wad, votes[msg.sender]);
    }

    function free(uint256 wad) public {
        deposits[msg.sender] = sub(deposits[msg.sender], wad);
        subWeight(wad, votes[msg.sender]);
    }

    function voteYays(address yay) public returns (bytes32) {
        bytes32 slate = etch(yay);
        voteSlate(slate);
        return slate;
    }

    function etch(address yay) public returns (bytes32 slate) {
        bytes32 hash = keccak256(abi.encodePacked(yay));
        slates[hash] = yay;
        return hash;
    }

    function voteSlate(bytes32 slate) public {
        uint256 weight = deposits[msg.sender];
        subWeight(weight, votes[msg.sender]);
        votes[msg.sender] = slate;
        addWeight(weight, votes[msg.sender]);
    }

    function addWeight(uint256 weight, bytes32 slate) internal {
        address yay = slates[slate];
        approvals[yay] = add(approvals[yay], weight);
    }

    function subWeight(uint256 weight, bytes32 slate) internal {
        address yay = slates[slate];
        approvals[yay] = sub(approvals[yay], weight);
    }

    function add(uint256 x, uint256 y) internal pure returns (uint256 z) {
        require((z = x + y) >= x);
    }

    function sub(uint256 x, uint256 y) internal pure returns (uint256 z) {
        require((z = x - y) <= x);
    }

    function checkInvariant() public {
        bytes32 senderSlate = votes[msg.sender];
        address option = slates[senderSlate];
        uint256 senderDeposit = deposits[msg.sender];
        if (approvals[option] < senderDeposit) {
            hacked = true;
        }
    }
}

contract SimpleDSChiefTest is Test {
    SimpleDSChief dsChief;

    function setUp() public {
        dsChief = new SimpleDSChief();
        targetContract(address(dsChief));
        targetSender(address(0x10000));
        targetSender(address(0x20000));
        targetSender(address(0x30000));
    }

    /// forge-config: default.invariant.runs = 500
    /// forge-config: default.invariant.depth = 500
    function invariant_check_dschief_with_return_value() public view {
        assertFalse(dsChief.hacked());
    }
}
forge test --mt invariant_check_dschief_with_event
contract SimpleDSChiefWithEvent {
    event Slate(bytes32 indexed slate);
    mapping(bytes32 => address) public slates;
    mapping(address => bytes32) public votes;
    mapping(address => uint256) public approvals;
    mapping(address => uint256) public deposits;

    bool public hacked = false;

    function lock(uint256 wad) public {
        deposits[msg.sender] = add(deposits[msg.sender], wad);
        addWeight(wad, votes[msg.sender]);
    }

    function free(uint256 wad) public {
        deposits[msg.sender] = sub(deposits[msg.sender], wad);
        subWeight(wad, votes[msg.sender]);
    }

    function voteYays(address yay) public {
        bytes32 hash = keccak256(abi.encodePacked(yay));
        slates[hash] = yay;
        voteSlate(hash);
    }

    function etch(address yay) public {
        bytes32 hash = keccak256(abi.encodePacked(yay));
        slates[hash] = yay;
        emit Slate(hash);
    }

    function voteSlate(bytes32 slate) public {
        uint256 weight = deposits[msg.sender];
        subWeight(weight, votes[msg.sender]);
        votes[msg.sender] = slate;
        addWeight(weight, votes[msg.sender]);
    }

    function addWeight(uint256 weight, bytes32 slate) internal {
        address yay = slates[slate];
        approvals[yay] = add(approvals[yay], weight);
    }

    function subWeight(uint256 weight, bytes32 slate) internal {
        address yay = slates[slate];
        approvals[yay] = sub(approvals[yay], weight);
    }

    function add(uint256 x, uint256 y) internal pure returns (uint256 z) {
        require((z = x + y) >= x);
    }

    function sub(uint256 x, uint256 y) internal pure returns (uint256 z) {
        require((z = x - y) <= x);
    }

    function checkInvariant() public {
        bytes32 senderSlate = votes[msg.sender];
        address option = slates[senderSlate];
        uint256 senderDeposit = deposits[msg.sender];

        if (approvals[option] < senderDeposit) {
            hacked = true;
        }
    }
}

contract SimpleDSChiefWithEventTest is Test {
    SimpleDSChiefWithEvent dsChief;

    function setUp() public {
        dsChief = new SimpleDSChiefWithEvent();
        targetContract(address(dsChief));
        targetSender(address(0x10000));
        targetSender(address(0x20000));
        targetSender(address(0x30000));
    }

    /// forge-config: default.invariant.runs = 500
    /// forge-config: default.invariant.depth = 500
    function invariant_check_dschief_with_event() public view {
        assertFalse(dsChief.hacked());
    }
}
CC @klkvr @mds1

grandizzy avatar Apr 15 '24 05:04 grandizzy

With latest PR changes foundry can catch DSChief bug in about 30 seconds (500 runs / 500 depth, missing it ~ 2 out of 10 times) and never missed in ~ 110 seconds (2000 runs with depth of 500)

Ran 1 test suite in 28.02s (28.02s CPU time): 0 tests passed, 1 failed, 0 skipped (1 total tests)
Failing tests:
Encountered 1 failing test in test/research/vera_dschief.t.sol:SimpleDSChiefTest
[FAIL. Reason: assertion failed]
        [Sequence]
                sender=0x0000000000000000000000000000000000030000 addr=[test/research/vera_dschief.t.sol:SimpleDSChief]0x5615dEB798BB3E4dFa0139dFa1b3D433Cc23b72f calldata=voteSlate(bytes32) args=[0xe100d6d47cca6bc77fdd20fbcbbfc6c8db6b9e81a47e4bda6afca133036d0ab5]
                sender=0x0000000000000000000000000000000000030000 addr=[test/research/vera_dschief.t.sol:SimpleDSChief]0x5615dEB798BB3E4dFa0139dFa1b3D433Cc23b72f calldata=lock(uint256) args=[1]
                sender=0x0000000000000000000000000000000000010000 addr=[test/research/vera_dschief.t.sol:SimpleDSChief]0x5615dEB798BB3E4dFa0139dFa1b3D433Cc23b72f calldata=etch(address) args=[0x00000000000000000000000000000000000008DA]
                sender=0x0000000000000000000000000000000000030000 addr=[test/research/vera_dschief.t.sol:SimpleDSChief]0x5615dEB798BB3E4dFa0139dFa1b3D433Cc23b72f calldata=checkInvariant() args=[]
 invariant_check_dschief() (runs: 500, calls: 249548, reverts: 32928)

The proposed solution is to:

  • collect a number of samples from target selectors return values and from events. These samples are limited to configured test run depth.
  • samples are persisted and applied across runs with a weight of 40 (so when fuzzing from state there'll be 60% values generated from unique values collected during run and 40% from values reused across runs)
  • solution favours multiple runs with average depth configured (rather than testing with less runs with big depth) - so issues like the DSChief one likely won't be caught if running let's say 10 times with depth of thousands
  • the number of samples and their randomness weight can be made configurable options (with defaults described above) but this will increase the UX complexity, so not sure about

Further improvement that can be considered is to make samples more efficient by collecting and applying them per type. However this introduce code complexity as we'll have to decode results / events with proper target abi and also to update fuzz strategies to take the fuzzed types into account.

@mds1 @klkvr would love to hear your thoughts re this approach, thanks

grandizzy avatar Apr 17 '24 15:04 grandizzy

I went ahead and committed a change to collect and apply samples per result type and with this approach was able to reproduce the DSChief bug consistently in preliminary testing, with same 500 / 500. Also, since these samples are now collected and targeted applied (so if you fuzz an address from state fuzzer makes sure value used from collected samples is of address type) I don't think making configurable options for number of samples and randomness would make sense anymore.

grandizzy avatar Apr 18 '24 13:04 grandizzy

this makes a lot of sense, the next step should probably be to improve the way we handle logs and storage

I added logs decoding in https://github.com/foundry-rs/foundry/commit/d9d8619e7e45077dc7850d47f7609380bd175cc6 , will track storage handle in follow up PR (to keep scope limited for this one) if you OK with

grandizzy avatar Apr 21 '24 11:04 grandizzy

In general the approach described here makes a lot of sense to me, definitely supportive. Regarding:

the number of samples and their randomness weight can be made configurable options (with defaults described above) but this will increase the UX complexity, so not sure about

I think your defaults seem pretty reasonable, so we should avoid increasing UX complexity or spending too much time debating param defaults until we have confidence that the changes matter or that the added complexity is worth it. And we can gain that confidence once we have good benchmarks to compare against. So my suggestion would be to stick what we have here, but document all the defaults/assumptions somewhere (I'm indifferent as to where), that way we can easily revisit these decisions and adjust based on data later

mds1 avatar Apr 26 '24 01:04 mds1