capstone icon indicating copy to clipboard operation
capstone copied to clipboard

Inconsistent disassembler of AArch32 MSR instruction

Open gerph opened this issue 1 year ago • 3 comments

Work environment

Questions Answers
System Capstone runs on OS/arch/bits MacOS x86
Capstone module affected arm
Source of Capstone pip install capstonegit clone, brew, pip, release binaries etc.
Version/git commit v5.01 - v5.0.6 at least + v6

Instruction bytes giving faulty results

0x21, 0xfc, 0x60, 0xd3

Expected results

Whilst the text of the disassembly comes out ok as:

msrle	spsr, #0x2100

it only reports a single operand - the SYSREG is not reported as operand 0.

Steps to get the wrong result

with Python

from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM

CODE = b'\x21\xfc\x60\xd3'

md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
  print(len(insn.operands))

which reports only 1 operand.

Compare this to the output of another MSR instruction, b'\x93\xf0\x21\x03', # MSREQ cpsr_c, #&83

from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM

CODE = b'\x93\xf0\x21\x03'

md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
  print(len(insn.operands))

which reports 2.

Additional Logs, screenshots, source code, configuration dump, ...

The fuller tool I use to display the example instructions is:

#!/usr/bin/python
##
# Disassemble and display the properties of the Capstone structures
#

from capstone import *
import capstone.arm_const

reg_map = [
        capstone.arm_const.ARM_REG_R0,
        capstone.arm_const.ARM_REG_R1,
        capstone.arm_const.ARM_REG_R2,
        capstone.arm_const.ARM_REG_R3,
        capstone.arm_const.ARM_REG_R4,
        capstone.arm_const.ARM_REG_R5,
        capstone.arm_const.ARM_REG_R6,
        capstone.arm_const.ARM_REG_R7,
        capstone.arm_const.ARM_REG_R8,
        capstone.arm_const.ARM_REG_R9,
        capstone.arm_const.ARM_REG_R10,
        capstone.arm_const.ARM_REG_R11,
        capstone.arm_const.ARM_REG_R12,
        capstone.arm_const.ARM_REG_SP,
        capstone.arm_const.ARM_REG_LR,
        capstone.arm_const.ARM_REG_PC,
    ]
inv_reg_map = dict((regval, regnum) for regnum, regval in enumerate(reg_map))

shift_names = {
        capstone.arm_const.ARM_SFT_INVALID: None,
        capstone.arm_const.ARM_SFT_ASR: 'ASR',
        capstone.arm_const.ARM_SFT_ASR_REG: 'ASR',
        capstone.arm_const.ARM_SFT_LSL: 'LSL',
        capstone.arm_const.ARM_SFT_LSL_REG: 'LSL',
        capstone.arm_const.ARM_SFT_LSR: 'LSR',
        capstone.arm_const.ARM_SFT_LSR_REG: 'LSR',
        capstone.arm_const.ARM_SFT_ROR: 'ROR',
        capstone.arm_const.ARM_SFT_ROR_REG: 'ROR',
        capstone.arm_const.ARM_SFT_RRX: 'RRX',
        capstone.arm_const.ARM_SFT_RRX_REG: 'RRX'
    }

optype_names = dict((getattr(capstone.arm_const, optype), optype) for optype in dir(capstone.arm_const) if optype.startswith('ARM_OP_'))

md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
md.mnemonic_setup(capstone.arm_const.ARM_INS_SVC, "SWI")
# Turn off APCS register naming
md.syntax = capstone.CS_OPT_SYNTAX_NOREGNAME

last_i = None

def show_disasm(code):
    global last_i
    for i in md.disasm(code, 0x1000):
        last_i = i
        print("")
        print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
        for index, operand in enumerate(i.operands):
            print("  op#%i: type=%i (%s)" % (index, operand.type, optype_names.get(operand.type, 'unknown')))
            if operand.type == capstone.arm_const.ARM_OP_IMM:
                print("        imm = %i" % (operand.imm,))
            if operand.type == capstone.arm_const.ARM_OP_REG:
                print("        reg = %i (R%s)" % (operand.reg, inv_reg_map[operand.reg]))
            if operand.type == capstone.arm_const.ARM_OP_MEM:
                print("        base = %i (R%s)" % (operand.mem.base, inv_reg_map.get(operand.mem.base, 'unknown')))
                print("        index = %i (R%s)" % (operand.mem.index, inv_reg_map.get(operand.mem.index, 'unknown')))
                print("        disp = %i" % (operand.mem.disp,))
                print("        lshift = %i (R%s)" % (operand.mem.lshift, inv_reg_map.get(operand.mem.lshift, 'unknown')))
            if operand.shift.type != capstone.arm_const.ARM_SFT_INVALID:
                if operand.shift.type in (capstone.arm_const.ARM_SFT_LSL,
                                          capstone.arm_const.ARM_SFT_LSR,
                                          capstone.arm_const.ARM_SFT_ASR,
                                          capstone.arm_const.ARM_SFT_ROR):
                    sname = shift_names[operand.shift.type]
                    print("        shift = %s #%i" % (sname, operand.shift.value))
                elif operand.shift.type in (capstone.arm_const.ARM_SFT_LSL_REG,
                                            capstone.arm_const.ARM_SFT_LSR_REG,
                                            capstone.arm_const.ARM_SFT_ASR_REG,
                                            capstone.arm_const.ARM_SFT_ROR_REG):
                    sname = shift_names[operand.shift.type]
                    reg = inv_reg_map[operand.shift.value]
                    print("        shift = %s R%s" % (sname, reg))
                else:
                    print("        shift = type=%i value=%i" % (operand.shift.type, operand.shift.value))

def insn__repr__(self):
    word = bytes(bytearray(reversed(list(self.bytes)))).encode('hex')
    return "<{}(word=0x{}, {} operands)>".format(self.__class__.__name__, word, len(self.operands))
capstone.CsInsn.__repr__ = insn__repr__

def armop__repr__(self):
    params = ['type={}'.format(optype_names.get(self.type, 'unknown'))]
    if self.type == capstone.arm_const.ARM_OP_IMM:
        params.append('imm={}'.format(self.imm))
    elif self.type == capstone.arm_const.ARM_OP_REG:
        params.append('reg={}'.format(inv_reg_map[self.reg]))
    elif self.type == capstone.arm_const.ARM_OP_MEM:
        params.append('basereg={}'.format(inv_reg_map.get(self.mem.base, 'unknown')))
        params.append('indexreg={}'.format(inv_reg_map.get(self.mem.index, 'unknown')))
        params.append('displacement={}'.format(self.mem.disp))
        params.append('lshift={}'.format(self.mem.lshift))
    if self.shift.type != capstone.arm_const.ARM_SFT_INVALID:
        if self.shift.type in (capstone.arm_const.ARM_SFT_LSL,
                               capstone.arm_const.ARM_SFT_LSR,
                               capstone.arm_const.ARM_SFT_ASR,
                               capstone.arm_const.ARM_SFT_ROR):
            sname = shift_names[self.shift.type]
            params.append("shift={} #{}".format(sname, self.shift.value))
        else:
            params.append("shift=type{} #{}".format(self.shift.type, self.shift.value))
    return "<{}({})>".format(self.__class__.__name__, ', '.join(params))
capstone.arm.ArmOp.__repr__ = armop__repr__

print("cs_version() = %r" % (cs_version(),))

examples = [
        b'\x05\x00\x00\xef', # SWI 5
        b'\x20\x00\x50\xe3', # CMP r0, #&20
        b'\x40\x00\x9f\x05', # LDREQ   r0,[pc,#64]
        b'\x05\x00\x00\x2f', # SWI 5
        b'\x08\x00\x00\xeb', # BL pc+8*4
        b'\xba\x50\x8f\xb2', # ADDLT r5, pc, #186
        b'\x6C\x43\x9f\xE5', # LDR r4, [pc, #&36c]
        b'\x0b\xb0\x97\xe7', # LDR     r11, [r7, r11]
        b'\x04\x00\x5f\xe5', # LDRB r0, [pc, #4]
        b'\x03\x00\x92\xe8', # LDMIA   r2, {r0, r1}
        b'\x03\x00\x92\xd8', # LDMLEIA r2, {r0, r1}
        b'\x00\x18\xa0\xe1', # LSL r1, r0, #&10 => MOV r1, r0, LSL #16
        b'\x21\x18\xa0\xe1', # LSR r1, r1, #&10 => MOV r1, r1, LSR #16
        b'\x26\xc4\xb0\xe1', # LSRS r12, r6, #8 => MOVS r12, r6, LSR #8
        b'\x12\x13\xa0\xe1', # LSL r1, r2, r3   => MOV r1, r2, LSL r3
        b'\x52\x13\xa0\xe1', # ASR r1, r2, r3   => MOV r1, r2, ASR r3
        b'\x62\x10\xa0\xe1', # RRX r1, r2       => MOV r1, r2, RRX
        b'\x53\x30\xeb\xe7', # UBFX r3, r3, #0, #0xc
        b'\x01\x0f\x81\xe2', # ADD r0, r1, #1, #30  => ADD r0, r1, #2
        b'\x1e\x10\x81\x11', # ORRNE r1, r1, r14, LSL r0
        b'\x11\x0f\x8f\x12', # ADRNE r0, &4c
        b'\x03\xf0\x21\x01', # MSREQ cpsr_c, r3
        b'\x93\xf0\x21\x03', # MSREQ cpsr_c, #&83
        b'\x21\xfc\x60\xd3', # MSRNE spsr, #&2100 ; does not report a SYSREG
    ]
for code in examples:
    show_disasm(code)

Which, for the final two instructions, gives me:

0x1000:	msreq	cpsr_c, #0x93
  op#0: type=67 (ARM_OP_SYSREG)
  op#1: type=2 (ARM_OP_IMM)
        imm = 147

0x1000:	msrle	spsr, #0x2100
  op#0: type=2 (ARM_OP_IMM)
        imm = 8448

The ARM_OP_SYSREG has not been recorded - it should say that this is a SPSR operation, I think, but this has been lost and the immediate constant has been recorded as the only operand.

gerph avatar Apr 13 '25 10:04 gerph

This seems to be undefined behavior (of the ARM ISA). At least a quick search gave at least one should be set:

Image

Guess I would add the register and leave flag the bits 0 then.

Rot127 avatar Apr 13 '25 11:04 Rot127

Turns out this is not correct (checked an ISA post v7). It is just way more complicated (ARMv7 - ARM DDI 0406C.d):

Image

Image

Rot127 avatar Apr 13 '25 12:04 Rot127

@gerph Did you get this instruction from a binary? Because it is marked as unpredictable/undefined (writes spsr but mask is 0).

Rot127 avatar Apr 13 '25 12:04 Rot127