maxas
maxas copied to clipboard
print control code meanings
Address https://groups.google.com/forum/#!topic/maxas-discuss/4rovrjSRzKA partially
(It would be nice to modify the parser too, and remove the redundant output, in the next line of the currently parsed '-:-:-:-:-' syntax, but ... baby-steps)
Not sure if this is really an improvement actually... a bit ... spammy
Maybe moderately nicer now? Example output:
# Kernel: microbench
# Arch: sm_50
# InsCnt:
# RegCnt: 10
# SharedSize: 4096
# BarCnt: 1
# Params(3):
# ord:addr:size:align
# 0:0x140:8:0
# 1:0x148:8:0
# 2:0x150:8:0
#
# Instructions:
<CONSTANT_MAPPING>
blockDimX : c[0x0][0x8]
blockDimY : c[0x0][0xc]
blockDimZ : c[0x0][0x10]
gridDimX : c[0x0][0x14]
gridDimY : c[0x0][0x18]
gridDimZ : c[0x0][0x1c]
param_0[0] : c[0x0][0x140]
param_0[1] : c[0x0][0x144]
param_1[0] : c[0x0][0x148]
param_1[1] : c[0x0][0x14c]
param_2[0] : c[0x0][0x150]
param_2[1] : c[0x0][0x154]
</CONSTANT_MAPPING>
# stall(6)
--:-:-:-:6 MOV R1, c[0x0][0x20];
--:-:-:-:0 CS2R R7, SR_CLOCKLO;
# setWriteBarrier(1);stall(1)
--:-:1:-:1 S2R R0, SR_CTAID.X;
# setWriteBarrier(1);stall(15)
--:-:1:-:f S2R R2, SR_CTAID.Y;
# waitBarriers([1])
01:-:-:-:0 XMAD R0, R2.reuse, 0xffff, R0;
# setWriteBarrier(1);stall(6)
--:-:1:-:6 S2R R4, SR_TID.X;
# stall(6)
--:-:-:-:6 XMAD.PSL R0, R2.H1, 0xffff, R0;
# stall(1)
--:-:-:-:1 SHL R2, R0.reuse, 0x2;
# stall(5)
--:-:-:-:5 SHR R0, R0, 0x1e;
# yield();stall(6)
--:-:-:Y:6 IADD R2.CC, R2, param_2[0];
# stall(2)
--:-:-:-:2 IADD.X R3, R0, param_2[1];
# setReadBarrier(2);setWriteBarrier(4);stall(1)
--:2:4:-:1 LDG.E R0, [R2];
# waitBarriers([1]);stall(1)
01:-:-:-:1 SHL R5, R4.reuse, 0x2;
# stall(3)
--:-:-:-:3 SHR R4, R4, 0x1e;
# setReadBarrier(1);waitBarriers([4]);stall(13)
08:1:-:-:d STS [R5], R0;
# waitBarriers([1,2]);stall(15)
03:-:-:-:f BAR.SYNC 0x0;
# stall(4)
--:-:-:-:4 LOP32I.XOR R6, R5.reuse, 0x4;
# setWriteBarrier(1);stall(1)
--:-:1:-:1 LDS.U.32 R6, [R6];
# stall(1)
--:-:-:-:1 IADD R9.CC, R5, param_1[0];
# stall(1)
--:-:-:-:1 SHR R0, R7, 0x10;
# stall(4)
--:-:-:-:4 CS2R R2, SR_CLOCKLO;
# yield();stall(1)
--:-:-:Y:1 IADD.X R3, R4, param_1[1];
# stall(1)
--:-:-:-:1 IADD R8.CC, R5, param_0[0];
# stall(1)
--:-:-:-:1 LOP3.LUT R0, R0, -0x10000, R2, 0xf8;
# stall(4)
--:-:-:-:4 MOV R2, R9;
--:-:-:-:0 IADD.X R5, R4, param_0[1];
# setReadBarrier(2);stall(1)
--:2:-:-:1 STG.E [R2], R0;
# stall(2)
--:-:-:-:2 MOV R4, R8;
# setReadBarrier(1);waitBarriers([1]);stall(1)
01:1:-:-:1 STG.E [R4], R6;
# stall(15)
--:-:-:-:f EXIT;