garnet icon indicating copy to clipboard operation
garnet copied to clipboard

Optimize NumUtils.CRC16

Open PaulusParssinen opened this issue 1 year ago • 11 comments

This PR changes the NumUtils.CRC16 method to use RVA instead of readonly static for the precomputed CRC16 table.

-; Total bytes of code 121, prolog size 11, PerfScore 53.06, instruction count 41, allocated bytes for code 121 (MethodHash=13cde191) for method Garnet.common.NumUtils:CRC16(ulong,int):ushort (FullOpts)
+; Total bytes of code 65, prolog size 0, PerfScore 36.06, instruction count 21, allocated bytes for code 65 (MethodHash=13cde191) for method Garnet.common.NumUtils:CRC16(ulong,int):ushort (FullOpts)

Full diff

Garnet.common.NumUtils:CRC16
 method Garnet.common.NumUtils:CRC16(ulong,int):ushort (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX - Windows
 ; FullOpts code
 ; optimized code
 ; rsp based frame
 ; fully interruptible
 ; No PGO data
+; 0 inlinees with PGO data; 2 single block inlinees; 0 inlinees without PGO data
 ; Final local variable assignments
 ;
 ;  V00 arg0         [V00,T01] (  7, 16   )    long  ->  registers  
-;  V01 arg1         [V01,T04] (  3,  3   )     int  ->  rdx         single-def
-;  V02 loc0         [V02,T02] (  5, 14   )  ushort  ->  rsi        
-;  V03 loc1         [V03,T03] (  3,  6   )    long  ->  rdi         single-def
-;  V04 OutArgs      [V04    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V05 tmp1         [V05,T00] (  3, 24   )    long  ->  rbx         "impSpillLclRefs"
-;* V06 cse0         [V06,T06] (  0,  0   )    long  ->  zero-ref    hoist "CSE #01: aggressive"
-;  V07 cse1         [V07,T05] (  2,  4.25)    long  ->  rax         hoist "CSE #02: aggressive"
+;  V01 arg1         [V01,T05] (  3,  3   )     int  ->  rdx         single-def
+;  V02 loc0         [V02,T02] (  5, 14   )  ushort  ->  rax        
+;  V03 loc1         [V03,T04] (  3,  6   )    long  ->  rdx         single-def
+;  V04 loc2         [V04,T03] (  2,  8   )    long  ->  rcx        
+;# V05 OutArgs      [V05    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
+;  V06 tmp1         [V06,T00] (  3, 24   )    long  ->  rcx         "impSpillLclRefs"
+;* V07 tmp2         [V07    ] (  0,  0   )  struct (16) zero-ref    "spilled call-like call argument" <System.ReadOnlySpan`1[ushort]>
+;* V08 tmp3         [V08    ] (  0,  0   )  struct (16) zero-ref    "ReadOnlySpan<T> for CreateSpan<T>" <System.ReadOnlySpan`1[ushort]>
+;* V09 tmp4         [V09    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op "Inlining Arg" <System.ReadOnlySpan`1[ushort]>
+;* V10 tmp5         [V10    ] (  0,  0   )   byref  ->  zero-ref    "field V07._reference (fldOffset=0x0)" P-INDEP
+;* V11 tmp6         [V11    ] (  0,  0   )     int  ->  zero-ref    "field V07._length (fldOffset=0x8)" P-INDEP
+;* V12 tmp7         [V12    ] (  0,  0   )   byref  ->  zero-ref    "field V08._reference (fldOffset=0x0)" P-INDEP
+;* V13 tmp8         [V13    ] (  0,  0   )     int  ->  zero-ref    "field V08._length (fldOffset=0x8)" P-INDEP
+;* V14 tmp9         [V14    ] (  0,  0   )   byref  ->  zero-ref    "field V09._reference (fldOffset=0x0)" P-INDEP
+;* V15 tmp10        [V15    ] (  0,  0   )     int  ->  zero-ref    "field V09._length (fldOffset=0x8)" P-INDEP
+;  V16 cse0         [V16,T06] (  2,  4.25)    long  ->   r8         hoist "CSE #01: aggressive"
 ;
-; Lcl frame size = 40
+; Lcl frame size = 0
 
 G_M7790_IG01:  ;; offset=0x0000
-       push     rdi
-       push     rsi
-       push     rbp
-       push     rbx
-       sub      rsp, 40
-       mov      rbx, rcx
-						;; size=11 bbWeight=1 PerfScore 4.50
-G_M7790_IG02:  ;; offset=0x000B
-       xor      esi, esi
-       movsxd   rdi, edx
-       add      rdi, rbx
-       cmp      rbx, rdi
-       jae      SHORT G_M7790_IG06
+						;; size=0 bbWeight=1 PerfScore 0.00
+G_M7790_IG02:  ;; offset=0x0000
+       xor      eax, eax
+       movsxd   rdx, edx
+       add      rdx, rcx
+       cmp      rcx, rdx
+       jae      SHORT G_M7790_IG05
 						;; size=13 bbWeight=1 PerfScore 2.00
-G_M7790_IG03:  ;; offset=0x0018
-       test     byte  ptr [(reloc 0x7ffc59808bf6)], 1      ; global ptr
-       je       SHORT G_M7790_IG08
-						;; size=9 bbWeight=0.25 PerfScore 1.00
-G_M7790_IG04:  ;; offset=0x0021
-       mov      rax, 0x7FFC59808C18      ; static handle
-       align    [0 bytes for IG05]
+G_M7790_IG03:  ;; offset=0x000D
+       mov      r8, 0x2195CA42EB0      ; static handle
+       align    [0 bytes for IG04]
 						;; size=10 bbWeight=0.25 PerfScore 0.06
-G_M7790_IG05:  ;; offset=0x002B
-       lea      rcx, [rbx+0x01]
-       mov      rbp, rcx
-       mov      rcx, qword ptr [rax]
-       mov      edx, esi
-       sar      edx, 8
-       movzx    r8, byte  ptr [rbx]
-       xor      edx, r8d
-       movzx    rdx, dl
-       movzx    rcx, word  ptr [rcx+2*rdx]
-       shl      esi, 8
-       xor      ecx, esi
-       movzx    rsi, cx
-       cmp      rbp, rdi
-       mov      rbx, rbp
-       jb       SHORT G_M7790_IG05
-						;; size=45 bbWeight=4 PerfScore 42.00
-G_M7790_IG06:  ;; offset=0x0058
-       mov      eax, esi
-						;; size=2 bbWeight=1 PerfScore 0.25
-G_M7790_IG07:  ;; offset=0x005A
-       add      rsp, 40
-       pop      rbx
-       pop      rbp
-       pop      rsi
-       pop      rdi
+G_M7790_IG04:  ;; offset=0x0017
+       lea      r10, [rcx+0x01]
+       mov      r9d, eax
+       sar      r9d, 8
+       movzx    rcx, byte  ptr [rcx]
+       xor      ecx, r9d
+       movzx    rcx, cl
+       movzx    rcx, word  ptr [r8+2*rcx]
+       shl      eax, 8
+       xor      eax, ecx
+       movzx    rax, ax
+       cmp      r10, rdx
+       mov      rcx, r10
+       jb       SHORT G_M7790_IG04
+						;; size=41 bbWeight=4 PerfScore 33.00
+G_M7790_IG05:  ;; offset=0x0040
        ret      
-						;; size=9 bbWeight=1 PerfScore 3.25
-G_M7790_IG08:  ;; offset=0x0063
-       mov      rcx, 0x7FFC59808B98
-       mov      edx, 46
-       call     CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
-       jmp      SHORT G_M7790_IG04
-						;; size=22 bbWeight=0 PerfScore 0.00
+						;; size=1 bbWeight=1 PerfScore 1.00
 
-; Total bytes of code 121, prolog size 11, PerfScore 53.06, instruction count 41, allocated bytes for code 121 (MethodHash=13cde191) for method Garnet.common.NumUtils:CRC16(ulong,int):ushort (FullOpts)
+; Total bytes of code 65, prolog size 0, PerfScore 36.06, instruction count 21, allocated bytes for code 65 (MethodHash=13cde191) for method Garnet.common.NumUtils:CRC16(ulong,int):ushort (FullOpts)
 ; ============================================================

PaulusParssinen avatar Mar 29 '24 21:03 PaulusParssinen

Huh, dotnet format gives me nothing locally 🤔

PaulusParssinen avatar Mar 29 '24 21:03 PaulusParssinen

Huh, dotnet format gives me nothing locally 🤔

@TalZaccai?

badrishc avatar Mar 29 '24 21:03 badrishc

Huh, dotnet format gives me nothing locally 🤔

@TalZaccai?

@PaulusParssinen I am seeing dotnet format making changes when run locally...

image

TalZaccai avatar Mar 29 '24 21:03 TalZaccai

Huh, dotnet format gives me nothing locally 🤔

@TalZaccai?

@PaulusParssinen I am seeing dotnet format making changes when run locally...

[pic omitted]

That formatting seems unfortunate 😬I'm running .NET 9 Preview which may be the reason. Testing if I can reproduce it by downgrading.

$ dotnet format --version
9.0.508102+36b6b8610f4d7530bc3bc290aaf85f48012e9d7a

PaulusParssinen avatar Mar 29 '24 21:03 PaulusParssinen

...

That formatting seems unfortunate 😬I'm running .NET 9 Preview which may be the reason. Testing if I can reproduce it by downgrading.

$ dotnet format --version
9.0.508102+36b6b8610f4d7530bc3bc290aaf85f48012e9d7a

Yup..

$ dotnet-format --version
8.0.453106+2651752953c0d41c8c7b8d661cf2237151af33d0
$ dotnet-format --verify-no-changes
C:\src\garnet\libs\common\NumUtils.cs(408,20): error WHITESPACE: Fix whitespace formatting. Replace 1 characters with '\r\n\s\s\s\s\s\s\s\s\s\s\s\s'. [C:\src\garnet\libs\common\Garnet.common.csproj]
C:\src\garnet\libs\common\NumUtils.cs(408,28): error WHITESPACE: Fix whitespace formatting. Replace 1 characters with '\r\n\s\s\s\s\s\s\s\s\s\s\s\s'. [C:\src\garnet\libs\common\Garnet.common.csproj]
[omitted]
C:\src\garnet\libs\common\NumUtils.cs(439,68): error WHITESPACE: Fix whitespace formatting. Replace 1 characters with '\r\n\s\s\s\s\s\s\s\s\s\s\s\s'. [C:\src\garnet\libs\common\Garnet.common.csproj]
NativeCommandExitException: Program "dotnet-format.exe" ended with non-zero exit code: 2.
$ dotnet tool install -g dotnet-format --version "9.*" --add-source https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet9/nuget/v3/index.json
Skipping NuGet package signature verification.
$ dotnet-format --version
9.0.517501+06fb492cb53e558598e9bf5ee4dd09acc5888a01
$ dotnet-format --verify-no-changes
[nothing]

Well, that's annoying..

PaulusParssinen avatar Mar 29 '24 21:03 PaulusParssinen

... That formatting seems unfortunate 😬I'm running .NET 9 Preview which may be the reason. Testing if I can reproduce it by downgrading.

$ dotnet format --version
9.0.508102+36b6b8610f4d7530bc3bc290aaf85f48012e9d7a

Yup..

$ dotnet-format --version
8.0.453106+2651752953c0d41c8c7b8d661cf2237151af33d0
$ dotnet-format --verify-no-changes
C:\src\garnet\libs\common\NumUtils.cs(408,20): error WHITESPACE: Fix whitespace formatting. Replace 1 characters with '\r\n\s\s\s\s\s\s\s\s\s\s\s\s'. [C:\src\garnet\libs\common\Garnet.common.csproj]
C:\src\garnet\libs\common\NumUtils.cs(408,28): error WHITESPACE: Fix whitespace formatting. Replace 1 characters with '\r\n\s\s\s\s\s\s\s\s\s\s\s\s'. [C:\src\garnet\libs\common\Garnet.common.csproj]
[omitted]
C:\src\garnet\libs\common\NumUtils.cs(439,68): error WHITESPACE: Fix whitespace formatting. Replace 1 characters with '\r\n\s\s\s\s\s\s\s\s\s\s\s\s'. [C:\src\garnet\libs\common\Garnet.common.csproj]
NativeCommandExitException: Program "dotnet-format.exe" ended with non-zero exit code: 2.
$ dotnet tool install -g dotnet-format --version "9.*" --add-source https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet9/nuget/v3/index.json
Skipping NuGet package signature verification.
$ dotnet-format --version
9.0.517501+06fb492cb53e558598e9bf5ee4dd09acc5888a01
$ dotnet-format --verify-no-changes
[nothing]

Well, that's annoying..

Agreed, that is unfortunate...

TalZaccai avatar Mar 29 '24 22:03 TalZaccai

I think this can be considered as dotnet format bug, the question now is, do I apply the fix and maybe wrap it in #region (as member of anti-#region legion, this is very hard 😆)? I'm not sure if using the dotnet-format v9.* in CI would require the .NET 9 SDK too.

PaulusParssinen avatar Mar 29 '24 22:03 PaulusParssinen

Filed https://github.com/dotnet/sdk/issues/39898 (dotnet format recently moved there)

I will format it using .NET 8 rules and move it to end of the NumUtils file for now, if thats okay?

PaulusParssinen avatar Mar 29 '24 22:03 PaulusParssinen

Filed dotnet/sdk#39898 (dotnet format recently moved there)

I will format it using .NET 8 rules and move it to end of the NumUtils file for now, if thats okay?

Sure, no worries! Hopefully they'll address that...

TalZaccai avatar Mar 29 '24 22:03 TalZaccai

I just realized I can just use normal array initializer to avoid the horrible formatting 😄

PaulusParssinen avatar Mar 29 '24 22:03 PaulusParssinen

I just realized I can just use normal array initializer to avoid the horrible formatting 😄

Ah excellent!

TalZaccai avatar Mar 30 '24 01:03 TalZaccai