trafficserver icon indicating copy to clipboard operation
trafficserver copied to clipboard

Overhead of dynamic_cast

Open masaori335 opened this issue 3 years ago • 3 comments

Here are some numbers from the measurement of the overhead of dynamic_cast on the inbound side of HTTP/1.1 on TCP.

Profile on Instruments

Instuments says 0.7% on __dynamic_cast and it's the second heaviest call of Htt1ClientSession::new_transaction() ( on the master branch (c6d3f6fac) ) Screen Shot 2021-06-28 at 9 52 58

rec/sec

items master PoC diff
avg 41379.7169 42578.5729 1.029
90%tile 44459.874 45816.123 1.031
min 34569.55 35596.56 1.030
max 46382.62 47461.07 1.023
  • master: c6d3f6fac
  • PoC: #8002 (e9a837a29fccde93ad63b785771ef12945e15a63)
  • CPU: Intel(R) Xeon(R) CPU E5-2660 v3 @ 2.60GHz, 40 core
  • HTTP/1.1 on TCP (no TLS)
  • no Keep-Alive
  • 1KB response body
  • 100% cache hit
  • ATS has default thread settings
  • Client:
for x in {1..100}; do h2load --h1 -n 100000 -m 1 -c 10 http://127.0.0.1:8080/static/1kb >> master.log; done

masaori335 avatar Jun 28 '21 13:06 masaori335

@ywkaras wanted to take a look at this

bryancall avatar Jul 19 '21 23:07 bryancall

If you're doing a dynamic cast of a pointer, and checking that the result is not zero with an assert, you can use this instead: https://github.com/apache/trafficserver/blob/ea5be6f04c2a087f0581f4cb2c67f693c7ff71f6/include/tscore/ink_assert.h#L72

If you're doing a dynamic cast of a reference, and not catching any exception it might throw, you can use this instead: https://github.com/apache/trafficserver/blob/ea5be6f04c2a087f0581f4cb2c67f693c7ff71f6/include/tscore/ink_assert.h#L90

But TS will get into a corrupt state, rather than abort, in a release build if the cast is invalid.

If you're actually using dynamic_cast for runtime type detection, you can instead keep track of the type in a member variable, or use typeid:

enum class T { B, C };

struct A
{
  A(T t_) : t(t_) {}
  virtual ~A();

  T t;
};

struct B : public A
{
  B() : A(T::B) {}
};

struct C : public A
{
  C() : A(T::C) {}
};

B * a_to_b1(A *a)
{
  return a->t == T::B ? static_cast<B *>(a) : nullptr;
}

B * a_to_b2(A *a)
{
  return dynamic_cast<B *>(a);
}

#include <typeinfo>

B * a_to_b3(A *a)
{
  return typeid(*a) == typeid(B) ? static_cast<B *>(a) : nullptr;
}

Here is the resulting assembler (compiled with O3)

	.section	__TEXT,__text,regular,pure_instructions
	.build_version macos, 10, 15, 4	sdk_version 10, 15, 4
	.globl	__Z7a_to_b1P1A          ## -- Begin function _Z7a_to_b1P1A
	.p2align	4, 0x90
__Z7a_to_b1P1A:                         ## @_Z7a_to_b1P1A
	.cfi_startproc
## %bb.0:
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
	.cfi_def_cfa_register %rbp
	xorl	%eax, %eax
	cmpl	$0, 8(%rdi)
	cmoveq	%rdi, %rax
	popq	%rbp
	retq
	.cfi_endproc
                                        ## -- End function
	.globl	__Z7a_to_b2P1A          ## -- Begin function _Z7a_to_b2P1A
	.p2align	4, 0x90
__Z7a_to_b2P1A:                         ## @_Z7a_to_b2P1A
	.cfi_startproc
## %bb.0:
	testq	%rdi, %rdi
	je	LBB1_1
## %bb.2:
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
	.cfi_def_cfa_register %rbp
	movq	__ZTI1A@GOTPCREL(%rip), %rsi
	movq	__ZTI1B@GOTPCREL(%rip), %rdx
	xorl	%ecx, %ecx
	popq	%rbp
	jmp	___dynamic_cast         ## TAILCALL
LBB1_1:
	xorl	%eax, %eax
	retq
	.cfi_endproc
                                        ## -- End function
	.globl	__Z7a_to_b3P1A          ## -- Begin function _Z7a_to_b3P1A
	.p2align	4, 0x90
__Z7a_to_b3P1A:                         ## @_Z7a_to_b3P1A
	.cfi_startproc
## %bb.0:
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
	.cfi_def_cfa_register %rbp
	pushq	%rbx
	pushq	%rax
	.cfi_offset %rbx, -24
	testq	%rdi, %rdi
	je	LBB2_5
## %bb.1:
	movq	%rdi, %rbx
	movq	(%rdi), %rax
	movq	-8(%rax), %rax
	movq	8(%rax), %rdi
	cmpq	__ZTS1B@GOTPCREL(%rip), %rdi
	je	LBB2_4
## %bb.2:
	movq	__ZTS1B@GOTPCREL(%rip), %rsi
	callq	_strcmp
	testl	%eax, %eax
	je	LBB2_4
## %bb.3:
	xorl	%ebx, %ebx
LBB2_4:
	movq	%rbx, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	retq
LBB2_5:
	callq	___cxa_bad_typeid
	.cfi_endproc
                                        ## -- End function
	.section	__TEXT,__const
	.globl	__ZTS1B                 ## @_ZTS1B
	.weak_definition	__ZTS1B
__ZTS1B:
	.asciz	"1B"

	.section	__DATA,__const
	.globl	__ZTI1B                 ## @_ZTI1B
	.weak_definition	__ZTI1B
	.p2align	3
__ZTI1B:
	.quad	__ZTVN10__cxxabiv120__si_class_type_infoE+16
	.quad	__ZTS1B
	.quad	__ZTI1A

Explicitly keeping track of the type looks like it would probably be faster. Not clear if using typeid would be any faster.

ywkaras avatar Jul 20 '21 01:07 ywkaras

This issue has been automatically marked as stale because it has not had recent activity. Marking it stale to flag it for further consideration by the community.

github-actions[bot] avatar Jul 20 '22 02:07 github-actions[bot]

Closing this as #9482 removed many dynamic_casts and attach_client_session disappeared.

maskit avatar Jul 11 '23 18:07 maskit