xxHash
xxHash copied to clipboard
Bug: XXH_INLINE_ALL does not always force inline? How to force inline?
Here's a simple program:
$ cat mystery.c
#include <stdio.h>
#include <assert.h>
#include <sys/time.h>
#include "xxHash/xxhash.h"
// export USE_FOO2=0; gcc -DUSE_FOO2=$USE_FOO2 -DXXH_INLINE_ALL -O1 -S -o mystery$USE_FOO2.s mystery.c && gcc -c -o mystery$USE_FOO2.o mystery$USE_FOO2.s && gcc -o mystery$USE_FOO2.exe mystery$USE_FOO2.o && ./mystery$USE_FOO2.exe; cat mystery$USE_FOO2.s | egrep -i "(^[a-z].*:|call)"
// export USE_FOO2=1; gcc -DUSE_FOO2=$USE_FOO2 -DXXH_INLINE_ALL -O1 -S -o mystery$USE_FOO2.s mystery.c && gcc -c -o mystery$USE_FOO2.o mystery$USE_FOO2.s && gcc -o mystery$USE_FOO2.exe mystery$USE_FOO2.o && ./mystery$USE_FOO2.exe; cat mystery$USE_FOO2.s | egrep -i "(^[a-z].*:|call)"
double get_time_in_seconds(void) {
struct timeval tv;
assert(gettimeofday(&tv, NULL) >= 0);
return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
}
uint64_t loop = 500000000;
void __attribute__ ((noinline)) foo1(void) {
XXH64_hash_t hash_xor = 0;
double t1 = get_time_in_seconds();
for(uint64_t i = 0; i < loop ; i ++) {
XXH64_hash_t hash = XXH64(&i, sizeof(i), 123456 /* seed */);
hash_xor = hash_xor ^ hash;
}
double t2 = get_time_in_seconds();
printf("- %lu 64bit hashes created xor hash 0x%lx in %f seconds or %.0f per second\n", loop, hash_xor, t2 - t1, loop / (t2 - t1));
}
#if USE_FOO2
void __attribute__ ((noinline)) foo2(void) {
XXH64_hash_t hash_xor = 0;
double t1 = get_time_in_seconds();
for(uint64_t i = 0; i < loop ; i ++) {
XXH64_hash_t hash = XXH64(&i, sizeof(i), 123456 /* seed */);
hash_xor = hash_xor ^ hash;
}
double t2 = get_time_in_seconds();
printf("- %lu 64bit hashes created xor hash 0x%lx in %f seconds or %.0f per second\n", loop, hash_xor, t2 - t1, loop / (t2 - t1));
}
#endif
void main(void) {
foo1();
//foo2();
}
Compiling and running it twice, the 1st time XXH64() is inlined, but the 2nd time it is not inlined:
$ gcc --version
gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
$ git clone https://github.com/Cyan4973/xxHash.git
$ export USE_FOO2=0; gcc -DUSE_FOO2=$USE_FOO2 -DXXH_INLINE_ALL -O1 -S -o mystery$USE_FOO2.s mystery.c && gcc -c -o mystery$USE_FOO2.o mystery$USE_FOO2.s && gcc -o mystery$USE_FOO2.exe mystery$USE_FOO2.o && ./mystery$USE_FOO2.exe; cat mystery$USE_FOO2.s | egrep -i "(^[a-z].*:|call)"
- 500000000 64bit hashes created xor hash 0xde5d248747c84e34 in 1.452864 seconds or 344147851 per second
get_time_in_seconds:
call gettimeofday@PLT
call __assert_fail@PLT
call __stack_chk_fail@PLT
foo1:
call get_time_in_seconds
call get_time_in_seconds
call __printf_chk@PLT
main:
call foo1
loop:
$ export USE_FOO2=1; gcc -DUSE_FOO2=$USE_FOO2 -DXXH_INLINE_ALL -O1 -S -o mystery$USE_FOO2.s mystery.c && gcc -c -o mystery$USE_FOO2.o mystery$USE_FOO2.s && gcc -o mystery$USE_FOO2.exe mystery$USE_FOO2.o && ./mystery$USE_FOO2.exe; cat mystery$USE_FOO2.s | egrep -i "(^[a-z].*:|call)"
- 500000000 64bit hashes created xor hash 0xde5d248747c84e34 in 2.790860 seconds or 179156250 per second
XXH64_finalize:
get_time_in_seconds:
call gettimeofday@PLT
call __assert_fail@PLT
call __stack_chk_fail@PLT
foo1:
call get_time_in_seconds
call XXH64_finalize
call get_time_in_seconds
call __printf_chk@PLT
call __stack_chk_fail@PLT
foo2:
call get_time_in_seconds
call XXH64_finalize
call get_time_in_seconds
call __printf_chk@PLT
call __stack_chk_fail@PLT
main:
call foo1
loop:
$ wc --bytes mystery*
16280 mystery0.exe
3088 mystery0.o
3508 mystery0.s
16344 mystery1.exe
4064 mystery1.o
6976 mystery1.s
3314 mystery.c
The xxHash README says "By default, xxHash uses attribute((always_inline)) and __forceinline to improve performance at the cost of code size." but does it really? Or is this a compiler bug? Or should I be compiling the above example differently somehow?
P.S. This also goes against my understanding of how the compiler decides to inline or not inline, if force inline is not specified. I always thought the compiler decides on code size in a function. But in this example, the code size of the function stays the same. We just add another function which is never actually called. So... ?!