perfview icon indicating copy to clipboard operation
perfview copied to clipboard

GetILOffsetForNativeAddress decrementing returned IL offset leads to wrongly-attributed samples

Open jakobbotsch opened this issue 3 years ago • 0 comments

We are working on adding sample-based PGO support to .NET Core. As part of this, I am using TraceEvent to parse traces and retrieve the samples as a (method, IL offset) pair. We are seeing some strange samples because of the following: https://github.com/microsoft/perfview/blob/3d2809c029715ad5104c0b25f0083283f1626e7f/src/TraceEvent/TraceLog.cs#L9184-L9190 I am not quite sure what the comment is describing, but it definitely leads to wrongly attributed samples. For example, in https://github.com/dotnet/performance/blob/b195acece4d20efc00a9e36bed854d6cf41ddaa6/src/benchmarks/micro/runtime/BenchmarksGame/k-nucleotide-9.cs#L184-L206 the loop calling checkEnding is very hot. The raw IP samples we see are the following: https://gist.github.com/jakobbotsch/bda616056b6859e551d606df41e1fb6d#file-count_ending-asm-L105-L130 which all look very reasonable. However, for the 101 samples on

 00007ff8`21166da9        4C8B4D18             mov      r9, qword ptr [rbp+18H]            ; 101

TraceCodeAddresses.ILOffset is returning 0x37, which is the middle of the previous instruction that is not part of the loop:

  .method private hidebysig static class [System.Collections]System.Collections.Generic.Dictionary`2<int64, class BenchmarksGame.Wrapper>
    countEnding(
      int32 l,
      int64 mask,
      unsigned int8 b
    ) cil managed
  {
    .maxstack 5
    .locals init (
      [0] int64 rollingKey,
      [1] unsigned int8[] firstBlock,
      [2] int32 start,
      [3] class [System.Collections]System.Collections.Generic.Dictionary`2<int64, class BenchmarksGame.Wrapper> dict,
      [4] int32 lastBlockId,
      [5] unsigned int8[] lastBlock,
      [6] int32 i,
      [7] int32 bl,
      [8] unsigned int8[] bytes,
      [9] int32 i_V_9,
      [10] int32 i_V_10
    )

    // [186 13 - 186 33]
    IL_0000: ldc.i4.0
    IL_0001: conv.i8
    IL_0002: stloc.0      // rollingKey

    // [187 13 - 187 45]
    IL_0003: ldsfld       class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]> BenchmarksGame.KNucleotide_9::threeBlocks
    IL_0008: ldc.i4.0
    IL_0009: callvirt     instance !0/*unsigned int8[]*/ class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]>::get_Item(int32)
    IL_000e: stloc.1      // firstBlock

    // [188 13 - 188 36]
    IL_000f: ldsfld       int32 BenchmarksGame.KNucleotide_9::threeStart
    IL_0014: stloc.2      // start

    IL_0015: br.s         IL_0024
    // start of loop, entry point: IL_0024

      // [189 29 - 189 82]
      IL_0017: ldloc.0      // rollingKey
      IL_0018: ldc.i4.2
      IL_0019: shl
      IL_001a: ldloc.1      // firstBlock
      IL_001b: ldloc.2      // start
      IL_001c: dup
      IL_001d: ldc.i4.1
      IL_001e: add
      IL_001f: stloc.2      // start
      IL_0020: ldelem.u1
      IL_0021: conv.u8
      IL_0022: or
      IL_0023: stloc.0      // rollingKey

      // [189 13 - 189 28]
      IL_0024: ldarg.0      // l
      IL_0025: ldc.i4.1
      IL_0026: sub
      IL_0027: dup
      IL_0028: starg.s      l
      IL_002a: ldc.i4.0
      IL_002b: bgt.s        IL_0017
    // end of loop

    // [190 13 - 190 56]
    IL_002d: newobj       instance void class [System.Collections]System.Collections.Generic.Dictionary`2<int64, class BenchmarksGame.Wrapper>::.ctor()
    IL_0032: stloc.3      // dict

    // [191 18 - 191 31]
    IL_0033: ldloc.2      // start
    IL_0034: stloc.s      i

    IL_0036: br.s         IL_004c
    // start of loop, entry point: IL_004c

      // [192 17 - 192 75]
      IL_0038: ldloc.3      // dict
      IL_0039: ldloca.s     rollingKey
      IL_003b: ldarg.2      // b
      IL_003c: ldloc.1      // firstBlock
      IL_003d: ldloc.s      i
      IL_003f: ldelem.u1
      IL_0040: ldarg.1      // mask
      IL_0041: call         void BenchmarksGame.KNucleotide_9::checkEnding(class [System.Collections]System.Collections.Generic.Dictionary`2<int64, class BenchmarksGame.Wrapper>, int64&, unsigned int8, unsigned int8, int64)

      // [191 56 - 191 59]
      IL_0046: ldloc.s      i
      IL_0048: ldc.i4.1
      IL_0049: add
      IL_004a: stloc.s      i

      // [191 33 - 191 54]
      IL_004c: ldloc.s      i
      IL_004e: ldloc.1      // firstBlock
      IL_004f: ldlen
      IL_0050: conv.i4
      IL_0051: blt.s        IL_0038
    // end of loop

    // [194 13 - 194 53]
    IL_0053: ldsfld       class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]> BenchmarksGame.KNucleotide_9::threeBlocks
    IL_0058: callvirt     instance int32 class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]>::get_Count()
    IL_005d: ldc.i4.1
    IL_005e: sub
    IL_005f: stloc.s      lastBlockId

    // [195 18 - 195 28]
    IL_0061: ldc.i4.1
    IL_0062: stloc.s      bl

    IL_0064: br.s         IL_009c
    // start of loop, entry point: IL_009c

      // [197 17 - 197 45]
      IL_0066: ldsfld       class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]> BenchmarksGame.KNucleotide_9::threeBlocks
      IL_006b: ldloc.s      bl
      IL_006d: callvirt     instance !0/*unsigned int8[]*/ class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]>::get_Item(int32)
      IL_0072: stloc.s      bytes

      // [198 22 - 198 31]
      IL_0074: ldc.i4.0
      IL_0075: stloc.s      i_V_9

      IL_0077: br.s         IL_008e
      // start of loop, entry point: IL_008e

        // [199 21 - 199 74]
        IL_0079: ldloc.3      // dict
        IL_007a: ldloca.s     rollingKey
        IL_007c: ldarg.2      // b
        IL_007d: ldloc.s      bytes
        IL_007f: ldloc.s      i_V_9
        IL_0081: ldelem.u1
        IL_0082: ldarg.1      // mask
        IL_0083: call         void BenchmarksGame.KNucleotide_9::checkEnding(class [System.Collections]System.Collections.Generic.Dictionary`2<int64, class BenchmarksGame.Wrapper>, int64&, unsigned int8, unsigned int8, int64)

        // [198 51 - 198 54]
        IL_0088: ldloc.s      i_V_9
        IL_008a: ldc.i4.1
        IL_008b: add
        IL_008c: stloc.s      i_V_9

        // [198 33 - 198 49]
        IL_008e: ldloc.s      i_V_9
        IL_0090: ldloc.s      bytes
        IL_0092: ldlen
        IL_0093: conv.i4
        IL_0094: blt.s        IL_0079
      // end of loop

      // [195 48 - 195 52]
      IL_0096: ldloc.s      bl
      IL_0098: ldc.i4.1
      IL_0099: add
      IL_009a: stloc.s      bl

      // [195 30 - 195 46]
      IL_009c: ldloc.s      bl
      IL_009e: ldloc.s      lastBlockId
      IL_00a0: blt.s        IL_0066
    // end of loop

    // [202 13 - 202 54]
    IL_00a2: ldsfld       class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]> BenchmarksGame.KNucleotide_9::threeBlocks
    IL_00a7: ldloc.s      lastBlockId
    IL_00a9: callvirt     instance !0/*unsigned int8[]*/ class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]>::get_Item(int32)
    IL_00ae: stloc.s      lastBlock

    // [203 18 - 203 27]
    IL_00b0: ldc.i4.0
    IL_00b1: stloc.s      i_V_10

    IL_00b3: br.s         IL_00ca
    // start of loop, entry point: IL_00ca

      // [204 17 - 204 74]
      IL_00b5: ldloc.3      // dict
      IL_00b6: ldloca.s     rollingKey
      IL_00b8: ldarg.2      // b
      IL_00b9: ldloc.s      lastBlock
      IL_00bb: ldloc.s      i_V_10
      IL_00bd: ldelem.u1
      IL_00be: ldarg.1      // mask
      IL_00bf: call         void BenchmarksGame.KNucleotide_9::checkEnding(class [System.Collections]System.Collections.Generic.Dictionary`2<int64, class BenchmarksGame.Wrapper>, int64&, unsigned int8, unsigned int8, int64)

      // [203 43 - 203 46]
      IL_00c4: ldloc.s      i_V_10
      IL_00c6: ldc.i4.1
      IL_00c7: add
      IL_00c8: stloc.s      i_V_10

      // [203 29 - 203 41]
      IL_00ca: ldloc.s      i_V_10
      IL_00cc: ldsfld       int32 BenchmarksGame.KNucleotide_9::threeEnd
      IL_00d1: blt.s        IL_00b5
    // end of loop

    // [205 13 - 205 25]
    IL_00d3: ldloc.3      // dict
    IL_00d4: ret

  } // end of method KNucleotide_9::countEnding

Logically, the IL offset 0x37 is part of the basic block that allocates the dictionary. For now I am working around this by adding 1 to the IL offsets returned by TraceEvent, but a more central fix would be nice.

jakobbotsch avatar Apr 19 '21 13:04 jakobbotsch