garnet
garnet copied to clipboard
Coalesce some writes in RespWriteUtils
Help JIT to coalesce some word and dword sized stores in RespWriteUtils.
Sample diffs
Garnet.common.RespWriteUtils:WriteNull
; Assembly listing for method Garnet.common.RespWriteUtils:WriteNull(byref,ulong):ubyte (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
+; 1 inlinees with PGO data; 3 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
-; V00 arg0 [V00,T00] ( 9, 6 ) byref -> rcx single-def
+; V00 arg0 [V00,T00] ( 7, 5 ) byref -> rcx single-def
; V01 arg1 [V01,T01] ( 3, 3 ) long -> rdx single-def
-; V02 loc0 [V02,T02] ( 9, 4.50) long -> r8
-; V03 OutArgs [V03 ] ( 1, 1 ) struct (32) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V04 cse0 [V04,T03] ( 3, 2.50) long -> r8 "CSE #01: aggressive"
+; V02 loc0 [V02,T04] ( 3, 1.50) long -> r8 single-def
+;# V03 OutArgs [V03 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;* V04 tmp1 [V04 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "NewObj constructor temp" <System.ReadOnlySpan`1[ubyte]>
+;* V05 tmp2 [V05 ] ( 0, 0 ) struct (16) zero-ref "Inlining Arg" <System.ReadOnlySpan`1[ubyte]>
+; V06 tmp3 [V06,T03] ( 2, 2 ) long -> rax "impAppendStmt"
+;* V07 tmp4 [V07 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg" <System.ReadOnlySpan`1[ubyte]>
+;* V08 tmp5 [V08 ] ( 0, 0 ) byref -> zero-ref single-def "field V04._reference (fldOffset=0x0)" P-INDEP
+;* V09 tmp6 [V09 ] ( 0, 0 ) int -> zero-ref single-def "field V04._length (fldOffset=0x8)" P-INDEP
+;* V10 tmp7 [V10 ] ( 0, 0 ) byref -> zero-ref single-def "field V05._reference (fldOffset=0x0)" P-INDEP
+;* V11 tmp8 [V11 ] ( 0, 0 ) int -> zero-ref single-def "field V05._length (fldOffset=0x8)" P-INDEP
+;* V12 tmp9 [V12 ] ( 0, 0 ) byref -> zero-ref single-def "field V07._reference (fldOffset=0x0)" P-INDEP
+;* V13 tmp10 [V13 ] ( 0, 0 ) int -> zero-ref "field V07._length (fldOffset=0x8)" P-INDEP
+; V14 cse0 [V14,T02] ( 3, 2.50) long -> r8 "CSE #01: aggressive"
;
-; Lcl frame size = 40
+; Lcl frame size = 0
G_M4163_IG01: ;; offset=0x0000
- sub rsp, 40
- ;; size=4 bbWeight=1 PerfScore 0.25
-G_M4163_IG02: ;; offset=0x0004
+ ;; size=0 bbWeight=1 PerfScore 0.00
+G_M4163_IG02: ;; offset=0x0000
mov r8, qword ptr [rcx]
sub rdx, r8
cmp edx, 5
jge SHORT G_M4163_IG05
;; size=11 bbWeight=1 PerfScore 3.50
-G_M4163_IG03: ;; offset=0x000F
+G_M4163_IG03: ;; offset=0x000B
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
-G_M4163_IG04: ;; offset=0x0011
- add rsp, 40
+G_M4163_IG04: ;; offset=0x000D
ret
- ;; size=5 bbWeight=0.50 PerfScore 0.62
-G_M4163_IG05: ;; offset=0x0016
+ ;; size=1 bbWeight=0.50 PerfScore 0.50
+G_M4163_IG05: ;; offset=0x000E
lea rax, [r8+0x01]
mov qword ptr [rcx], rax
mov byte ptr [r8], 36
- mov r8, qword ptr [rcx]
- lea rax, [r8+0x01]
- mov qword ptr [rcx], rax
- mov byte ptr [r8], 45
- mov r8, qword ptr [rcx]
- lea rax, [r8+0x01]
- mov qword ptr [rcx], rax
- mov byte ptr [r8], 49
- call [Garnet.common.RespWriteUtils:WriteNewline(byref)]
+ mov rax, qword ptr [rcx]
+ mov dword ptr [rax], 0xA0D312D
+ add qword ptr [rcx], 4
mov eax, 1
- ;; size=50 bbWeight=0.50 PerfScore 7.38
-G_M4163_IG06: ;; offset=0x0048
- add rsp, 40
+ ;; size=29 bbWeight=0.50 PerfScore 4.38
+G_M4163_IG06: ;; offset=0x002B
ret
- ;; size=5 bbWeight=0.50 PerfScore 0.62
+ ;; size=1 bbWeight=0.50 PerfScore 0.50
-; Total bytes of code 77, prolog size 4, PerfScore 12.50, instruction count 23, allocated bytes for code 77 (MethodHash=bc12efbc) for method Garnet.common.RespWriteUtils:WriteNull(byref,ulong):ubyte (FullOpts)
+; Total bytes of code 44, prolog size 0, PerfScore 9.00, instruction count 14, allocated bytes for code 44 (MethodHash=bc12efbc) for method Garnet.common.RespWriteUtils:WriteNull(byref,ulong):ubyte (FullOpts)
; ============================================================
Garnet.common.RespWriteUtils:WriteIntegerAsBulkString
; Assembly listing for method Garnet.common.RespWriteUtils:WriteIntegerAsBulkString(int,byref,ulong):ubyte (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX - Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
+; 2 inlinees with PGO data; 8 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
-; V00 arg0 [V00,T02] ( 5, 4.50) int -> rsi single-def
-; V01 arg1 [V01,T00] ( 14, 8.50) byref -> rbx single-def
-; V02 arg2 [V02,T03] ( 3, 3 ) long -> rdi single-def
-; V03 loc0 [V03,T04] ( 5, 4 ) int -> rbp single-def
-; V04 loc1 [V04,T05] ( 4, 3.50) ubyte -> r14 single-def
-; V05 loc2 [V05,T06] ( 3, 2.50) int -> rdx single-def
-; V06 loc3 [V06,T01] ( 15, 7.50) long -> r10
+; V00 arg0 [V00,T01] ( 5, 4.50) int -> rsi single-def
+; V01 arg1 [V01,T00] ( 12, 7.50) byref -> rbx single-def
+; V02 arg2 [V02,T02] ( 3, 3 ) long -> rdi single-def
+; V03 loc0 [V03,T03] ( 4, 3.50) int -> rbp single-def
+; V04 loc1 [V04,T04] ( 3, 3 ) ubyte -> r14 single-def
+; V05 loc2 [V05,T05] ( 3, 2.50) int -> rdx single-def
+; V06 loc3 [V06,T10] ( 3, 1.50) long -> rcx single-def
; V07 OutArgs [V07 ] ( 1, 1 ) struct (32) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V08 cse0 [V08,T07] ( 3, 2.50) long -> r10 "CSE #01: aggressive"
+;* V08 tmp1 [V08 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "NewObj constructor temp" <System.ReadOnlySpan`1[ubyte]>
+;* V09 tmp2 [V09 ] ( 0, 0 ) struct (16) zero-ref "Inlining Arg" <System.ReadOnlySpan`1[ubyte]>
+; V10 tmp3 [V10,T08] ( 2, 2 ) long -> rcx "impAppendStmt"
+;* V11 tmp4 [V11 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg" <System.ReadOnlySpan`1[ubyte]>
+;* V12 tmp5 [V12 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "NewObj constructor temp" <System.ReadOnlySpan`1[ubyte]>
+;* V13 tmp6 [V13 ] ( 0, 0 ) struct (16) zero-ref "Inlining Arg" <System.ReadOnlySpan`1[ubyte]>
+; V14 tmp7 [V14,T09] ( 2, 2 ) long -> rax "impAppendStmt"
+;* V15 tmp8 [V15 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg" <System.ReadOnlySpan`1[ubyte]>
+;* V16 tmp9 [V16 ] ( 0, 0 ) byref -> zero-ref single-def "field V08._reference (fldOffset=0x0)" P-INDEP
+;* V17 tmp10 [V17 ] ( 0, 0 ) int -> zero-ref single-def "field V08._length (fldOffset=0x8)" P-INDEP
+;* V18 tmp11 [V18 ] ( 0, 0 ) byref -> zero-ref single-def "field V09._reference (fldOffset=0x0)" P-INDEP
+;* V19 tmp12 [V19 ] ( 0, 0 ) int -> zero-ref single-def "field V09._length (fldOffset=0x8)" P-INDEP
+;* V20 tmp13 [V20 ] ( 0, 0 ) byref -> zero-ref single-def "field V11._reference (fldOffset=0x0)" P-INDEP
+;* V21 tmp14 [V21 ] ( 0, 0 ) int -> zero-ref "field V11._length (fldOffset=0x8)" P-INDEP
+;* V22 tmp15 [V22 ] ( 0, 0 ) byref -> zero-ref single-def "field V12._reference (fldOffset=0x0)" P-INDEP
+;* V23 tmp16 [V23 ] ( 0, 0 ) int -> zero-ref single-def "field V12._length (fldOffset=0x8)" P-INDEP
+;* V24 tmp17 [V24 ] ( 0, 0 ) byref -> zero-ref single-def "field V13._reference (fldOffset=0x0)" P-INDEP
+;* V25 tmp18 [V25 ] ( 0, 0 ) int -> zero-ref single-def "field V13._length (fldOffset=0x8)" P-INDEP
+;* V26 tmp19 [V26 ] ( 0, 0 ) byref -> zero-ref single-def "field V15._reference (fldOffset=0x0)" P-INDEP
+;* V27 tmp20 [V27 ] ( 0, 0 ) int -> zero-ref "field V15._length (fldOffset=0x8)" P-INDEP
+; V28 cse0 [V28,T06] ( 3, 2.50) long -> rcx "CSE #01: aggressive"
+; V29 cse1 [V29,T07] ( 3, 2.50) int -> r15 "CSE #02: aggressive"
;
-; Lcl frame size = 32
+; Lcl frame size = 40
G_M4598_IG01: ;; offset=0x0000
+ push r15
push r14
push rdi
push rsi
push rbp
push rbx
- sub rsp, 32
+ sub rsp, 40
mov esi, ecx
mov rbx, rdx
mov rdi, r8
- ;; size=18 bbWeight=1 PerfScore 6.00
-G_M4598_IG02: ;; offset=0x0012
+ ;; size=20 bbWeight=1 PerfScore 7.00
+G_M4598_IG02: ;; offset=0x0014
movsxd rcx, esi
call [Garnet.common.NumUtils:NumDigitsInLong(long):int]
mov ebp, eax
mov r14d, esi
shr r14d, 31
- lea ecx, [r14+rbp]
+ lea r15d, [r14+rbp]
+ mov ecx, r15d
call [Garnet.common.NumUtils:NumDigits(int):int]
mov edx, eax
- lea eax, [rdx+r14]
- lea eax, [rax+rbp+0x05]
- mov r10, qword ptr [rbx]
- sub rdi, r10
+ add r14d, edx
+ lea eax, [r14+rbp+0x05]
+ mov rcx, qword ptr [rbx]
+ sub rdi, rcx
cmp eax, edi
jle SHORT G_M4598_IG05
- ;; size=48 bbWeight=1 PerfScore 13.00
-G_M4598_IG03: ;; offset=0x0042
+ ;; size=51 bbWeight=1 PerfScore 13.00
+G_M4598_IG03: ;; offset=0x0047
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
-G_M4598_IG04: ;; offset=0x0044
- add rsp, 32
+G_M4598_IG04: ;; offset=0x0049
+ add rsp, 40
pop rbx
pop rbp
pop rsi
pop rdi
pop r14
+ pop r15
ret
- ;; size=11 bbWeight=0.50 PerfScore 1.88
-G_M4598_IG05: ;; offset=0x004F
- lea rcx, [r10+0x01]
- mov qword ptr [rbx], rcx
- mov byte ptr [r10], 36
- lea ecx, [r14+rbp]
+ ;; size=13 bbWeight=0.50 PerfScore 2.12
+G_M4598_IG05: ;; offset=0x0056
+ lea r8, [rcx+0x01]
+ mov qword ptr [rbx], r8
+ mov byte ptr [rcx], 36
+ mov ecx, r15d
mov r8, rbx
call [Garnet.common.NumUtils:IntToBytes(int,int,byref)]
- mov r10, qword ptr [rbx]
- lea rcx, [r10+0x01]
- mov qword ptr [rbx], rcx
- mov byte ptr [r10], 13
- mov r10, qword ptr [rbx]
- lea rcx, [r10+0x01]
- mov qword ptr [rbx], rcx
- mov byte ptr [r10], 10
+ mov rcx, qword ptr [rbx]
+ mov word ptr [rcx], 0xA0D
+ add qword ptr [rbx], 2
mov ecx, esi
mov edx, ebp
mov r8, rbx
call [Garnet.common.NumUtils:IntToBytes(int,int,byref)]
- mov r10, qword ptr [rbx]
- lea rax, [r10+0x01]
- mov qword ptr [rbx], rax
- mov byte ptr [r10], 13
- mov r10, qword ptr [rbx]
- lea rax, [r10+0x01]
- mov qword ptr [rbx], rax
- mov byte ptr [r10], 10
+ mov rax, qword ptr [rbx]
+ mov word ptr [rax], 0xA0D
+ add qword ptr [rbx], 2
mov eax, 1
- ;; size=98 bbWeight=0.50 PerfScore 14.12
-G_M4598_IG06: ;; offset=0x00B1
- add rsp, 32
+ ;; size=64 bbWeight=0.50 PerfScore 11.00
+G_M4598_IG06: ;; offset=0x0096
+ add rsp, 40
pop rbx
pop rbp
pop rsi
pop rdi
pop r14
+ pop r15
ret
- ;; size=11 bbWeight=0.50 PerfScore 1.88
+ ;; size=13 bbWeight=0.50 PerfScore 2.12
-; Total bytes of code 188, prolog size 10, PerfScore 37.00, instruction count 65, allocated bytes for code 188 (MethodHash=cb87ee09) for method Garnet.common.RespWriteUtils:WriteIntegerAsBulkString(int,byref,ulong):ubyte (FullOpts)
+; Total bytes of code 163, prolog size 12, PerfScore 35.38, instruction count 59, allocated bytes for code 163 (MethodHash=cb87ee09) for method Garnet.common.RespWriteUtils:WriteIntegerAsBulkString(int,byref,ulong):ubyte (FullOpts)
+; ============================================================
Very cool!!