toolchain
toolchain copied to clipboard
ARCv2 variadic codegen investigation
Busybox free prints stray characters
Linux version 5.6.0-00223-gf03b92a6f9a7 (vineetg@vineetg-Latitude-7400) (gcc version 10.2.0 (Buildroot 2021.02-6-g5e29ba7bf732)) #1 PREEMPT Tue Apr 20 11:54:40 PDT 2021
Memory @ 80000000 [1024M]
Memory @ 100000000 [1024M] Not used
OF: fdt: Machine model: snps,zebu_hs
earlycon: uart8250 at MMIO32 0xf0000000 (options '115200n8')
printk: bootconsole [uart8250] enabled
IDENTITY : ARCVER [0x54] ARCNUM [0x0] CHIPID [0xffff]
processor [0] : HS38 R3.10a (ARCv2 ISA)
ISA Extn : atomic ll64 unalign mpy[opt 7] div_rem
BPU : partial match, cache:256, Predict Table:2048 Return stk: 4
MMU [v4] : 8k/2M, swalk 2 lvl, JTLB 256x4, uDTLB 8, uITLB 4, PAE40 (not used)
I-Cache : 64K, 4way/set, 64B Line, VIPT aliasing
D-Cache : 64K, 2way/set, 64B Line, PIPT
Peripherals : 0xc0000000
Timers : Timer0 Timer1
Vector Table : 0x80000000 [64-bit]
archs-intc : 16 priority levels (default 1) FIRQ (not used) hw autosave
...
# free -m
total used free shared buff/cache available
Mem: 1013 3 965 %1@llu %1@�lu %1@�pu
Swap: %1@llu %1@�lu %1@�pu
This is off of Buildroot 2021.02, upstream gcc 10.2 + (https://github.com/foss-for-synopsys-dwc-arc-processors/gcc/commit/5aeabae7f0cdd8dd3a01103b68b2e7a66a71c685)[5aeabae7f0cdd8d] "arc: Refurbish adc/sbc patterns"
BR2_GCC_TARGET_CPU="hs4x"
BR2_OPTIMIZE_2=y
BR2_BINUTILS_VERSION_2_36_X=y
BR2_BINUTILS_VERSION="2.36.1"
BR2_GCC_VERSION_10_X=y
BR2_GCC_VERSION="10.2.0"
BR2_TARGET_OPTIMIZATION="-mfpu=fpud_all"
busybox free uses /proc/meminfo to print this.
# strace free -m
openat(AT_FDCWD, "/proc/meminfo", O_RDONLY|O_LARGEFILE) = 3
statx(3, "", AT_STATX_SYNC_AS_STAT|AT_EMPTY_PATH, STATX_BASIC_STATS, {stx_mask=STATX_BASIC_STATS, stx_attributes=0, stx_mode=S_IFREG|0444, stx_size=0, ...}) = 0
read(3, "MemTotal: 1037936 kB\nMemF"..., 1024) = 1008
close(3) = 0
write(1, "Mem: 1013 3 "..., 65Mem: 1013 3 965 %1@llu %1@�lu %1@�pu
) = 65
write(1, "Swap: %1@llu %1@\344lu %1@\344pu\n", 29Swap: %1@llu %1@�lu %1@�pu
) = 29
The data received from kernel seems ok.
# cat /proc/meminfo
MemTotal: 1037936 kB
MemFree: 988448 kB
MemAvailable: 982512 kB
Buffers: 0 kB
Cached: 45392 kB
SwapCached: 0 kB
Active: 12616 kB
Inactive: 33320 kB
Active(anon): 12616 kB
Inactive(anon): 33320 kB
Active(file): 0 kB
Inactive(file): 0 kB
Unevictable: 0 kB
Mlocked: 0 kB
SwapTotal: 0 kB
SwapFree: 0 kB
Dirty: 0 kB
Writeback: 0 kB
AnonPages: 568 kB
Mapped: 928 kB
Shmem: 45392 kB
KReclaimable: 272 kB
Slab: 1520 kB
SReclaimable: 272 kB
SUnreclaim: 1248 kB
KernelStack: 216 kB
PageTables: 120 kB
NFS_Unstable: 0 kB
Bounce: 0 kB
WritebackTmp: 0 kB
CommitLimit: 518968 kB
Committed_AS: 47096 kB
VmallocTotal: 253952 kB
VmallocUsed: 24 kB
VmallocChunk: 0 kB
Percpu: 64 kB
This looks like a codegen issue, due to an ABI cornercase
small test: arc-linux-gcc -Os small.c
extern unsigned long long get_ll(void);
void free_main(void)
{
printf("%12llu %11llu %11llu %11llu %11llu %11llu\n",
get_ll(), get_ll(), get_ll(),
get_ll(), get_ll(), get_ll());
}
So printf() is called with 7 args, 1st char * and 6 others (long long - needing register pair). ABI says function calls only use regs r0 to r7, rest are passed on stack.
1: r0 2: r1r2 3: r3r4 4: r5d6 5 : r7 + stack
So 5th argument is split between a register and stack - I'm not sure if this is an issue (i haven't seen the callee side codegen yet)
glibc print internals is insanely complex, so im still trying to unpack variadic codegen with simple caller / callee but don't understand how things work really.
variadic caller: emuating the printf caller in busybox
extern unsigned long long get_1(void);
extern unsigned long long get_2(void);
extern unsigned long long get_3(void);
extern unsigned long long get_4(void);
extern unsigned long long get_5(void);
extern unsigned long long get_6(void);
extern void printf0(char *format, ...);
printf0("%12llu %11llu %11llu %11llu %11llu %11llu\n",
get_1(), get_2(), get_3(), get_4(), get_5(), get_6());
arc-linux-gcc -O2 -c -fno-delayed-branch caller.c --save-temps
It seems caller passes initial 8 words datum through r0-r7 first and the rest on stack. get_4() output straddles a reg (r7) and stack for passing to a caller
bl @get_1
mov_s r14,r0
mov_s r15,r1
bl @get_2
mov_s r16,r0
mov_s r17,r1
bl @get_3
mov_s r18,r0
mov_s r19,r1
bl @get_4
mov_s r13,r0 ; 4:{l}
mov_s r22,r1 ; 4:{h}
bl @get_5
mov_s r20,r0
mov_s r21,r1
bl @get_6
st r22,[sp] ; 4:{h} @ stack (PARTIAL ARG)
std r0,[sp,12]
std r20,[sp,4] ; 5:{l,h} @ stack
mov_s r7,r13 ; 4:{l} @ reg r7 (PARTIAL ARG)
mov_s r5,r18
mov_s r6,r19 ; 3:{l,h} @ reg r5r6
mov_s r3,r16
mov_s r4,r17
mov_s r1,r14
mov_s r2,r15
mov_s r0,@.LC0 ; 0 format specifier
bl @printf0
varaidic callee: : simple test which looks like a printf for dummies
extern void foo(long long n);
void callee(char *fixed, ...)
{
long long a, b, c, d, e, f;
va_list v;
va_start(v, fixed);
a = __builtin_va_arg(v, long long);
b = __builtin_va_arg(v, long long);
c = __builtin_va_arg(v, long long);
d = __builtin_va_arg(v, long long);
e = __builtin_va_arg(v, long long);
f = __builtin_va_arg(v, long long);
va_end(v);
foo(a);
foo(d);
}
However here it seems everything is fetched off stack (and no regs passed datum used)
callee:
st.a blink,[sp,-32]
std.a r14,[sp,-8]
st_s r2,[sp,16]
st r7,[sp,36]
st_s r1,[sp,12]
ldd r14,[sp,36] <-- arg for 2nd foo call, gets 4th vararg FROM stack NOT r7 + stack
ldd r0,[sp,12] <-- arg for 1st foo call, gets 1st vararg FROM stack NOT r1r2
st_s r3,[sp,20]
std r4,[sp,24]
st r6,[sp,32]
bl @foo
mov_s r0,r14
mov_s r1,r15
bl @foo
Creating a new issue to discuss the busybox print problem since variadic codegen doesn't seem like an issue here.
@abrodkin is this still an issue?
@pavelvkozlov could you please take a look at this one at some point, as it seems to be glibc related?
Actually, there is no issue here and variadic codegen works as expected. Just in case I checked it again. The problem with busybox free printing was evolved in the issue #372. I think we can close this issue.