trafficserver
trafficserver copied to clipboard
Overhead of dynamic_cast
Here are some numbers from the measurement of the overhead of dynamic_cast
on the inbound side of HTTP/1.1 on TCP.
Profile on Instruments
Instuments says 0.7%
on __dynamic_cast
and it's the second heaviest call of Htt1ClientSession::new_transaction()
( on the master branch (c6d3f6fac) )
rec/sec
items | master | PoC | diff |
---|---|---|---|
avg | 41379.7169 | 42578.5729 | 1.029 |
90%tile | 44459.874 | 45816.123 | 1.031 |
min | 34569.55 | 35596.56 | 1.030 |
max | 46382.62 | 47461.07 | 1.023 |
- master: c6d3f6fac
- PoC: #8002 (e9a837a29fccde93ad63b785771ef12945e15a63)
- CPU: Intel(R) Xeon(R) CPU E5-2660 v3 @ 2.60GHz, 40 core
- HTTP/1.1 on TCP (no TLS)
- no Keep-Alive
- 1KB response body
- 100% cache hit
- ATS has default thread settings
- Client:
for x in {1..100}; do h2load --h1 -n 100000 -m 1 -c 10 http://127.0.0.1:8080/static/1kb >> master.log; done
@ywkaras wanted to take a look at this
If you're doing a dynamic cast of a pointer, and checking that the result is not zero with an assert, you can use this instead: https://github.com/apache/trafficserver/blob/ea5be6f04c2a087f0581f4cb2c67f693c7ff71f6/include/tscore/ink_assert.h#L72
If you're doing a dynamic cast of a reference, and not catching any exception it might throw, you can use this instead: https://github.com/apache/trafficserver/blob/ea5be6f04c2a087f0581f4cb2c67f693c7ff71f6/include/tscore/ink_assert.h#L90
But TS will get into a corrupt state, rather than abort, in a release build if the cast is invalid.
If you're actually using dynamic_cast for runtime type detection, you can instead keep track of the type in a member variable, or use typeid:
enum class T { B, C };
struct A
{
A(T t_) : t(t_) {}
virtual ~A();
T t;
};
struct B : public A
{
B() : A(T::B) {}
};
struct C : public A
{
C() : A(T::C) {}
};
B * a_to_b1(A *a)
{
return a->t == T::B ? static_cast<B *>(a) : nullptr;
}
B * a_to_b2(A *a)
{
return dynamic_cast<B *>(a);
}
#include <typeinfo>
B * a_to_b3(A *a)
{
return typeid(*a) == typeid(B) ? static_cast<B *>(a) : nullptr;
}
Here is the resulting assembler (compiled with O3)
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 10, 15, 4 sdk_version 10, 15, 4
.globl __Z7a_to_b1P1A ## -- Begin function _Z7a_to_b1P1A
.p2align 4, 0x90
__Z7a_to_b1P1A: ## @_Z7a_to_b1P1A
.cfi_startproc
## %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
xorl %eax, %eax
cmpl $0, 8(%rdi)
cmoveq %rdi, %rax
popq %rbp
retq
.cfi_endproc
## -- End function
.globl __Z7a_to_b2P1A ## -- Begin function _Z7a_to_b2P1A
.p2align 4, 0x90
__Z7a_to_b2P1A: ## @_Z7a_to_b2P1A
.cfi_startproc
## %bb.0:
testq %rdi, %rdi
je LBB1_1
## %bb.2:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movq __ZTI1A@GOTPCREL(%rip), %rsi
movq __ZTI1B@GOTPCREL(%rip), %rdx
xorl %ecx, %ecx
popq %rbp
jmp ___dynamic_cast ## TAILCALL
LBB1_1:
xorl %eax, %eax
retq
.cfi_endproc
## -- End function
.globl __Z7a_to_b3P1A ## -- Begin function _Z7a_to_b3P1A
.p2align 4, 0x90
__Z7a_to_b3P1A: ## @_Z7a_to_b3P1A
.cfi_startproc
## %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
pushq %rbx
pushq %rax
.cfi_offset %rbx, -24
testq %rdi, %rdi
je LBB2_5
## %bb.1:
movq %rdi, %rbx
movq (%rdi), %rax
movq -8(%rax), %rax
movq 8(%rax), %rdi
cmpq __ZTS1B@GOTPCREL(%rip), %rdi
je LBB2_4
## %bb.2:
movq __ZTS1B@GOTPCREL(%rip), %rsi
callq _strcmp
testl %eax, %eax
je LBB2_4
## %bb.3:
xorl %ebx, %ebx
LBB2_4:
movq %rbx, %rax
addq $8, %rsp
popq %rbx
popq %rbp
retq
LBB2_5:
callq ___cxa_bad_typeid
.cfi_endproc
## -- End function
.section __TEXT,__const
.globl __ZTS1B ## @_ZTS1B
.weak_definition __ZTS1B
__ZTS1B:
.asciz "1B"
.section __DATA,__const
.globl __ZTI1B ## @_ZTI1B
.weak_definition __ZTI1B
.p2align 3
__ZTI1B:
.quad __ZTVN10__cxxabiv120__si_class_type_infoE+16
.quad __ZTS1B
.quad __ZTI1A
Explicitly keeping track of the type looks like it would probably be faster. Not clear if using typeid would be any faster.
This issue has been automatically marked as stale because it has not had recent activity. Marking it stale to flag it for further consideration by the community.
Closing this as #9482 removed many dynamic_cast
s and attach_client_session
disappeared.