perfview
perfview copied to clipboard
GetILOffsetForNativeAddress decrementing returned IL offset leads to wrongly-attributed samples
We are working on adding sample-based PGO support to .NET Core. As part of this, I am using TraceEvent to parse traces and retrieve the samples as a (method, IL offset) pair. We are seeing some strange samples because of the following:
https://github.com/microsoft/perfview/blob/3d2809c029715ad5104c0b25f0083283f1626e7f/src/TraceEvent/TraceLog.cs#L9184-L9190
I am not quite sure what the comment is describing, but it definitely leads to wrongly attributed samples. For example, in
https://github.com/dotnet/performance/blob/b195acece4d20efc00a9e36bed854d6cf41ddaa6/src/benchmarks/micro/runtime/BenchmarksGame/k-nucleotide-9.cs#L184-L206
the loop calling checkEnding
is very hot. The raw IP samples we see are the following:
https://gist.github.com/jakobbotsch/bda616056b6859e551d606df41e1fb6d#file-count_ending-asm-L105-L130
which all look very reasonable. However, for the 101 samples on
00007ff8`21166da9 4C8B4D18 mov r9, qword ptr [rbp+18H] ; 101
TraceCodeAddresses.ILOffset
is returning 0x37, which is the middle of the previous instruction that is not part of the loop:
.method private hidebysig static class [System.Collections]System.Collections.Generic.Dictionary`2<int64, class BenchmarksGame.Wrapper>
countEnding(
int32 l,
int64 mask,
unsigned int8 b
) cil managed
{
.maxstack 5
.locals init (
[0] int64 rollingKey,
[1] unsigned int8[] firstBlock,
[2] int32 start,
[3] class [System.Collections]System.Collections.Generic.Dictionary`2<int64, class BenchmarksGame.Wrapper> dict,
[4] int32 lastBlockId,
[5] unsigned int8[] lastBlock,
[6] int32 i,
[7] int32 bl,
[8] unsigned int8[] bytes,
[9] int32 i_V_9,
[10] int32 i_V_10
)
// [186 13 - 186 33]
IL_0000: ldc.i4.0
IL_0001: conv.i8
IL_0002: stloc.0 // rollingKey
// [187 13 - 187 45]
IL_0003: ldsfld class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]> BenchmarksGame.KNucleotide_9::threeBlocks
IL_0008: ldc.i4.0
IL_0009: callvirt instance !0/*unsigned int8[]*/ class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]>::get_Item(int32)
IL_000e: stloc.1 // firstBlock
// [188 13 - 188 36]
IL_000f: ldsfld int32 BenchmarksGame.KNucleotide_9::threeStart
IL_0014: stloc.2 // start
IL_0015: br.s IL_0024
// start of loop, entry point: IL_0024
// [189 29 - 189 82]
IL_0017: ldloc.0 // rollingKey
IL_0018: ldc.i4.2
IL_0019: shl
IL_001a: ldloc.1 // firstBlock
IL_001b: ldloc.2 // start
IL_001c: dup
IL_001d: ldc.i4.1
IL_001e: add
IL_001f: stloc.2 // start
IL_0020: ldelem.u1
IL_0021: conv.u8
IL_0022: or
IL_0023: stloc.0 // rollingKey
// [189 13 - 189 28]
IL_0024: ldarg.0 // l
IL_0025: ldc.i4.1
IL_0026: sub
IL_0027: dup
IL_0028: starg.s l
IL_002a: ldc.i4.0
IL_002b: bgt.s IL_0017
// end of loop
// [190 13 - 190 56]
IL_002d: newobj instance void class [System.Collections]System.Collections.Generic.Dictionary`2<int64, class BenchmarksGame.Wrapper>::.ctor()
IL_0032: stloc.3 // dict
// [191 18 - 191 31]
IL_0033: ldloc.2 // start
IL_0034: stloc.s i
IL_0036: br.s IL_004c
// start of loop, entry point: IL_004c
// [192 17 - 192 75]
IL_0038: ldloc.3 // dict
IL_0039: ldloca.s rollingKey
IL_003b: ldarg.2 // b
IL_003c: ldloc.1 // firstBlock
IL_003d: ldloc.s i
IL_003f: ldelem.u1
IL_0040: ldarg.1 // mask
IL_0041: call void BenchmarksGame.KNucleotide_9::checkEnding(class [System.Collections]System.Collections.Generic.Dictionary`2<int64, class BenchmarksGame.Wrapper>, int64&, unsigned int8, unsigned int8, int64)
// [191 56 - 191 59]
IL_0046: ldloc.s i
IL_0048: ldc.i4.1
IL_0049: add
IL_004a: stloc.s i
// [191 33 - 191 54]
IL_004c: ldloc.s i
IL_004e: ldloc.1 // firstBlock
IL_004f: ldlen
IL_0050: conv.i4
IL_0051: blt.s IL_0038
// end of loop
// [194 13 - 194 53]
IL_0053: ldsfld class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]> BenchmarksGame.KNucleotide_9::threeBlocks
IL_0058: callvirt instance int32 class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]>::get_Count()
IL_005d: ldc.i4.1
IL_005e: sub
IL_005f: stloc.s lastBlockId
// [195 18 - 195 28]
IL_0061: ldc.i4.1
IL_0062: stloc.s bl
IL_0064: br.s IL_009c
// start of loop, entry point: IL_009c
// [197 17 - 197 45]
IL_0066: ldsfld class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]> BenchmarksGame.KNucleotide_9::threeBlocks
IL_006b: ldloc.s bl
IL_006d: callvirt instance !0/*unsigned int8[]*/ class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]>::get_Item(int32)
IL_0072: stloc.s bytes
// [198 22 - 198 31]
IL_0074: ldc.i4.0
IL_0075: stloc.s i_V_9
IL_0077: br.s IL_008e
// start of loop, entry point: IL_008e
// [199 21 - 199 74]
IL_0079: ldloc.3 // dict
IL_007a: ldloca.s rollingKey
IL_007c: ldarg.2 // b
IL_007d: ldloc.s bytes
IL_007f: ldloc.s i_V_9
IL_0081: ldelem.u1
IL_0082: ldarg.1 // mask
IL_0083: call void BenchmarksGame.KNucleotide_9::checkEnding(class [System.Collections]System.Collections.Generic.Dictionary`2<int64, class BenchmarksGame.Wrapper>, int64&, unsigned int8, unsigned int8, int64)
// [198 51 - 198 54]
IL_0088: ldloc.s i_V_9
IL_008a: ldc.i4.1
IL_008b: add
IL_008c: stloc.s i_V_9
// [198 33 - 198 49]
IL_008e: ldloc.s i_V_9
IL_0090: ldloc.s bytes
IL_0092: ldlen
IL_0093: conv.i4
IL_0094: blt.s IL_0079
// end of loop
// [195 48 - 195 52]
IL_0096: ldloc.s bl
IL_0098: ldc.i4.1
IL_0099: add
IL_009a: stloc.s bl
// [195 30 - 195 46]
IL_009c: ldloc.s bl
IL_009e: ldloc.s lastBlockId
IL_00a0: blt.s IL_0066
// end of loop
// [202 13 - 202 54]
IL_00a2: ldsfld class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]> BenchmarksGame.KNucleotide_9::threeBlocks
IL_00a7: ldloc.s lastBlockId
IL_00a9: callvirt instance !0/*unsigned int8[]*/ class [System.Collections]System.Collections.Generic.List`1<unsigned int8[]>::get_Item(int32)
IL_00ae: stloc.s lastBlock
// [203 18 - 203 27]
IL_00b0: ldc.i4.0
IL_00b1: stloc.s i_V_10
IL_00b3: br.s IL_00ca
// start of loop, entry point: IL_00ca
// [204 17 - 204 74]
IL_00b5: ldloc.3 // dict
IL_00b6: ldloca.s rollingKey
IL_00b8: ldarg.2 // b
IL_00b9: ldloc.s lastBlock
IL_00bb: ldloc.s i_V_10
IL_00bd: ldelem.u1
IL_00be: ldarg.1 // mask
IL_00bf: call void BenchmarksGame.KNucleotide_9::checkEnding(class [System.Collections]System.Collections.Generic.Dictionary`2<int64, class BenchmarksGame.Wrapper>, int64&, unsigned int8, unsigned int8, int64)
// [203 43 - 203 46]
IL_00c4: ldloc.s i_V_10
IL_00c6: ldc.i4.1
IL_00c7: add
IL_00c8: stloc.s i_V_10
// [203 29 - 203 41]
IL_00ca: ldloc.s i_V_10
IL_00cc: ldsfld int32 BenchmarksGame.KNucleotide_9::threeEnd
IL_00d1: blt.s IL_00b5
// end of loop
// [205 13 - 205 25]
IL_00d3: ldloc.3 // dict
IL_00d4: ret
} // end of method KNucleotide_9::countEnding
Logically, the IL offset 0x37 is part of the basic block that allocates the dictionary. For now I am working around this by adding 1 to the IL offsets returned by TraceEvent, but a more central fix would be nice.