esp-llvm
esp-llvm copied to clipboard
Poor basic block scheduling/branch optimization
The following code produces sub-optimal code:
extern void f(void);
void f2(int test) {
switch (test) {
case 1:
f();
break;
case 2:
f();
break;
case 3:
f();
break;
case 4:
f();
break;
}
}
Running it with
clang -mriscv=RV64IAMFD -target riscv64 -S -O3 -o invalid-branch.c.64Full.S invalid-branch.c
produces
f2:
addi x2, x2, -16
sd x1, 8(x2)
sd x8, 0(x2)
add x8, x2, x0
li x5, 2
blt x5, x10, LBB0_3
li x6, 1
beq x10, x6, LBB0_5
beq x10, x5, LBB0_5
j LBB0_6
LBB0_3:
li x5, 3
beq x10, x5, LBB0_5
li x5, 4
bne x10, x5, LBB0_6
LBB0_5:
lui x5, %hi(f)
addi x5, x5, %lo(f)
jalr x1, x5, 0
LBB0_6:
add x2, x8, x0
ld x8, 0(x2)
ld x1, 8(x2)
addi x2, x2, 16
ret
which is not as good as the riscv-gcc version, most likely due to the implementation of analyze branch and associated target specific code in llvm
riscv64-unknown-elf-gcc -O3 -S -o invalid-branch.c.64Full.gcc.S invalid-branch.c
produces
f2:
li a5,2
beq a0,a5,.L7
ble a0,a5,.L9
li a5,3
beq a0,a5,.L7
li a5,4
bne a0,a5,.L10
.L7:
tail f
.L9:
li a5,1
beq a0,a5,.L7
ret
.L10:
ret