evmole icon indicating copy to clipboard operation
evmole copied to clipboard

Generate cfg output using Dot notation

Open zerjioang opened this issue 3 months ago • 5 comments

I see that evmole outperforms many of the existing tools out there to create EVM bytecode based CFGs and wanted to compare its results with other tools in a more deterministic way. For that, I was thinking on generating a graph image, very basic, and then compare the shape of the graph with other tools. However, there is no way in the current version to generate such graph file.

I want to create a dot notation graph using your Python SDK but im not able to generate a result properly. Could you provide a script tool to generate such output? I attempted to do a basic implementation but Im unable to create the proper connections. Here is my python script so I can create a .dot file using GraphViz notation for any input bytecode string.

import sys
from evmole import Contract
from evmole import contract_info

# to convert from list to dot
from dataclasses import dataclass
from typing import List, Union

@dataclass
class BlockType:
    pass

@dataclass
class Jumpi(BlockType):
    true_to: int
    false_to: int

@dataclass
class Terminate(BlockType):
    success: bool

@dataclass
class Block:
    start: int
    end: int
    btype: Union[Jumpi, Terminate]

def blocks_to_dot(blocks: List[Block]) -> str:
    dot_lines = ["digraph G {"]
    # Assign each block a label
    block_labels = {block.start: f"Block_{block.start}_{block.end}" for block in blocks}

    for block in blocks:
        label = block_labels[block.start]
        btype = block.btype

        # Add the node
        if isinstance(btype, Jumpi):
            dot_lines.append(f'    "{label}" [label="{label}\\nJumpi"];')
            true_label = block_labels.get(btype.true_to, f"Block_{btype.true_to}")
            false_label = block_labels.get(btype.false_to, f"Block_{btype.false_to}")
            dot_lines.append(f'    "{label}" -> "{true_label}" [label="true"];')
            dot_lines.append(f'    "{label}" -> "{false_label}" [label="false"];')
        elif isinstance(btype, Terminate):
            status = "Success" if btype.success else "Failure"
            dot_lines.append(f'    "{label}" [label="{label}\\nTerminate({status})", shape=box];')
        else:
            dot_lines.append(f'    "{label}" [label="{label}\\nUnknown"];')

    dot_lines.append("}")
    return "\n".join(dot_lines)


def main():
    if len(sys.argv) != 2:
        print(f"Usage: {sys.argv[0]} <evm_bytecode_hex>", file=sys.stderr)
        sys.exit(1)

    hex_code = sys.argv[1].strip().lower()
    if hex_code.startswith("0x"):
        hex_code = hex_code[2:]

    from evmole import ControlFlowGraph, Block, BlockType, DynamicJump
    info = contract_info(hex_code, basic_blocks=True, control_flow_graph=True, selectors=True)
    output = info.control_flow_graph.blocks
    dot = blocks_to_dot(output)
    print(dot)

if __name__ == "__main__":
    main()

Any help is appreciated

zerjioang avatar Sep 19 '25 16:09 zerjioang

Example:

Command executed:

python3 /opt/evmole/run.py 0x60606040526000543411601157600080fd5b60015473ffffffffffffffffffffffffffffffffffffffff1663d7bb99ba6247b760346040518363ffffffff167c0100000000000000000000000000000000000000000000000000000000028152600401600060405180830381858988f15050505050500000a165627a7a72305820d43c494964b14aa56c5eee3c28da8f5049cd2c382d21ee39a116116b5c1253db0029

Current Output

digraph G {
    "Block_0_12" [label="Block_0_12\nUnknown"];
    "Block_13_16" [label="Block_13_16\nUnknown"];
    "Block_17_118" [label="Block_17_118\nUnknown"];
}

Expected Kind of output

digraph G {
bgcolor=transparent rankdir=UD;
node [shape=box style=filled color=black fillcolor=white fontname=arial fontcolor=black];
0 [label="0: PUSH1 0x60\l2: PUSH1 0x40\l4: MSTORE\l5: PUSH1 0x00\l7: SLOAD\l8: CALLVALUE\l9: GT\l10: PUSH1 0x11\l12: JUMPI\l" fillcolor=lemonchiffon shape=Msquare fillcolor=gold ];
119 [label="119: EXIT BLOCK\l" fillcolor=crimson ];
17 [label="17: JUMPDEST\l18: PUSH1 0x01\l20: SLOAD\l21: PUSH20 0xffffffffffffffffffffffffffffffffffffffff\l42: AND\l43: PUSH4 0xd7bb99ba\l48: PUSH3 0x47b760\l52: CALLVALUE\l53: PUSH1 0x40\l55: MLOAD\l56: DUP4\l57: PUSH4 0xffffffff\l62: AND\l63: PUSH29 0x0100000000000000000000000000000000000000000000000000000000\l93: MUL\l94: DUP2\l95: MSTORE\l96: PUSH1 0x04\l98: ADD\l99: PUSH1 0x00\l101: PUSH1 0x40\l103: MLOAD\l104: DUP1\l105: DUP4\l106: SUB\l107: DUP2\l108: DUP6\l109: DUP10\l110: DUP9\l111: CALL\l112: POP\l113: POP\l114: POP\l115: POP\l116: POP\l117: POP\l118: STOP\l" shape=Msquare color=crimson ];
13 [label="13: PUSH1 0x00\l15: DUP1\l16: REVERT\l" fillcolor=lemonchiffon shape=Msquare color=crimson ];
0 -> 17;
13 -> 119;
17 -> 119;
0 -> 13;
}

created using Ethersolve tool: https://github.com/SeUniVr/EtherSolve

Image

zerjioang avatar Sep 19 '25 16:09 zerjioang

You need to use BlockType imported from evmole, not your dataclasses defined in file. Here is a patch for you script:

--- o.py        2025-09-19 20:53:35.574659440 +0300
+++ b.py        2025-09-19 20:51:57.730546913 +0300
@@ -1,31 +1,8 @@
 import sys
-from evmole import Contract
-from evmole import contract_info
+from evmole import contract_info, Contract
+from evmole import ControlFlowGraph, Block, BlockType, DynamicJump
 
-# to convert from list to dot
-from dataclasses import dataclass
-from typing import List, Union
-
-@dataclass
-class BlockType:
-    pass
-
-@dataclass
-class Jumpi(BlockType):
-    true_to: int
-    false_to: int
-
-@dataclass
-class Terminate(BlockType):
-    success: bool
-
-@dataclass
-class Block:
-    start: int
-    end: int
-    btype: Union[Jumpi, Terminate]
-
-def blocks_to_dot(blocks: List[Block]) -> str:
+def blocks_to_dot(blocks: list[Block]) -> str:
     dot_lines = ["digraph G {"]
     # Assign each block a label
     block_labels = {block.start: f"Block_{block.start}_{block.end}" for block in blocks}
@@ -35,13 +12,13 @@
         btype = block.btype
 
         # Add the node
-        if isinstance(btype, Jumpi):
+        if isinstance(btype, BlockType.Jumpi):
             dot_lines.append(f'    "{label}" [label="{label}\\nJumpi"];')
             true_label = block_labels.get(btype.true_to, f"Block_{btype.true_to}")
             false_label = block_labels.get(btype.false_to, f"Block_{btype.false_to}")
             dot_lines.append(f'    "{label}" -> "{true_label}" [label="true"];')
             dot_lines.append(f'    "{label}" -> "{false_label}" [label="false"];')
-        elif isinstance(btype, Terminate):
+        elif isinstance(btype, BlockType.Terminate):
             status = "Success" if btype.success else "Failure"
             dot_lines.append(f'    "{label}" [label="{label}\\nTerminate({status})", shape=box];')
         else:
@@ -60,7 +37,6 @@
     if hex_code.startswith("0x"):
         hex_code = hex_code[2:]
 
-    from evmole import ControlFlowGraph, Block, BlockType, DynamicJump
     info = contract_info(hex_code, basic_blocks=True, control_flow_graph=True, selectors=True)
     output = info.control_flow_graph.blocks
     dot = blocks_to_dot(output)

output:

digraph G {
    "Block_0_12" [label="Block_0_12\nJumpi"];
    "Block_0_12" -> "Block_17_118" [label="true"];
    "Block_0_12" -> "Block_13_16" [label="false"];
    "Block_13_16" [label="Block_13_16\nTerminate(Failure)", shape=box];
    "Block_17_118" [label="Block_17_118\nTerminate(Success)", shape=box];
}

To include assembly code you can add disassemble=True to contract_info and match offsets with blocks.

Also, you can try https://evmole.xyz/#0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2/eth/control_flow just to compare results - it uses only evmole js api + some postprocessing, feel free to write if you have any more questions

cdump avatar Sep 19 '25 17:09 cdump

And don't forget to handle Jump and DynamicJumpi block type's, example

cdump avatar Sep 19 '25 18:09 cdump

Good. So here is the updated script:

#!/usr/bin/env python3

import sys
from evmole import contract_info, Block, BlockType


def normalize_targets(block_labels, targets):
    if targets is None:
        return []
    if isinstance(targets, list):
        return [block_labels.get(t, f"Block_{t}") for t in targets if t is not None]
    return [block_labels.get(targets, f"Block_{targets}")]


def blocks_to_dot(blocks: list[Block], instructions: dict[int, str]) -> str:
    dot_lines = ["digraph G {", '    node [shape=box, fontname="Courier"];']
    block_labels = {block.start: f"Block_{block.start}_{block.end}" for block in blocks}

    for block in blocks:
        label = block_labels[block.start]
        btype = block.btype

        # Collect instructions for this block
        body_instrs = []
        for offset in range(block.start, block.end):
            if offset in instructions:
                body_instrs.append(instructions[offset])
        body = "\\l".join(body_instrs) + "\\l" if body_instrs else ""

        # Node definition
        dot_lines.append(f'    "{label}" [label="{label}\\n{body}"];')

        # Edges
        match btype:
            case BlockType.Jumpi(true_to, false_to):
                for true_label in normalize_targets(block_labels, true_to):
                    dot_lines.append(f'    "{label}" -> "{true_label}" [label="true"];')
                for false_label in normalize_targets(block_labels, false_to):
                    dot_lines.append(f'    "{label}" -> "{false_label}" [label="false"];')
            case BlockType.Jump(to):
                for to_label in normalize_targets(block_labels, to):
                    dot_lines.append(f'    "{label}" -> "{to_label}" [label="jump"];')
            case BlockType.DynamicJumpi(true_to, false_to):
                for t in normalize_targets(block_labels, true_to):
                    dot_lines.append(f'    "{label}" -> "{t}" [label="true"];')
                for f in normalize_targets(block_labels, false_to):
                    dot_lines.append(f'    "{label}" -> "{f}" [label="false"];')
            case BlockType.DynamicJump(to):
                for t in normalize_targets(block_labels, to):
                    dot_lines.append(f'    "{label}" -> "{t}" [label="jump"];')
            case BlockType.Terminate:
                status = "Success" if btype.success else "Failure"
                dot_lines.append(f'    "{label}" [label="{label}\\nTerminate({status})", shape=ellipse];')

    dot_lines.append("}")
    return "\n".join(dot_lines)


def main():
    if len(sys.argv) != 2:
        print(f"Usage: {sys.argv[0]} <evm_bytecode_hex>", file=sys.stderr)
        sys.exit(1)

    hex_code = sys.argv[1].strip().lower()
    if hex_code.startswith("0x"):
        hex_code = hex_code[2:]

    info = contract_info(
        hex_code,
        basic_blocks=True,
        control_flow_graph=True,
        selectors=True,
        disassemble=True,
    )

    instructions = {
    ins[0]: f"0x{ins[0]:x}:{ins[1]}"
        for ins in info.disassembled
    }

    output = info.control_flow_graph.blocks
    dot = blocks_to_dot(output, instructions)
    print(dot)


if __name__ == "__main__":
    main()

which creates basic .dot graphs like:

digraph G {
    node [shape=box, fontname="Courier"];
    "Block_0_12" [label="Block_0_12\n0x0:PUSH1 60\l0x2:PUSH1 40\l0x4:MSTORE\l0x5:PUSH1 00\l0x7:SLOAD\l0x8:CALLVALUE\l0x9:GT\l0xa:PUSH1 11\l"];
    "Block_0_12" -> "Block_17_118" [label="true"];
    "Block_0_12" -> "Block_13_16" [label="false"];
    "Block_13_16" [label="Block_13_16\n0xd:PUSH1 00\l0xf:DUP1\l"];
    "Block_17_118" [label="Block_17_118\n0x11:JUMPDEST\l0x12:PUSH1 01\l0x14:SLOAD\l0x15:PUSH20 ffffffffffffffffffffffffffffffffffffffff\l0x2a:AND\l0x2b:PUSH4 d7bb99ba\l0x30:PUSH3 47b760\l0x34:CALLVALUE\l0x35:PUSH1 40\l0x37:MLOAD\l0x38:DUP4\l0x39:PUSH4 ffffffff\l0x3e:AND\l0x3f:PUSH29 0100000000000000000000000000000000000000000000000000000000\l0x5d:MUL\l0x5e:DUP2\l0x5f:MSTORE\l0x60:PUSH1 04\l0x62:ADD\l0x63:PUSH1 00\l0x65:PUSH1 40\l0x67:MLOAD\l0x68:DUP1\l0x69:DUP4\l0x6a:SUB\l0x6b:DUP2\l0x6c:DUP6\l0x6d:DUP10\l0x6e:DUP9\l0x6f:CALL\l0x70:POP\l0x71:POP\l0x72:POP\l0x73:POP\l0x74:POP\l0x75:POP\l"];
}
Image

Do you mind if I create a PR to add this utility script to the codebase?

zerjioang avatar Sep 20 '25 13:09 zerjioang

Do you mind if I create a PR to add this utility script to the codebase?

Of course, you can add it to python/ folder as dot_graph.py or something similar and write a few words about it in python/README (optional)

cdump avatar Sep 23 '25 08:09 cdump