Replace Vector2 with Numerics.Vector2
Requires sussing out performance differences with benchmarks first.
This would also apply to any other maths data structures we have that are already in System.Numerics.
Pros to swapping:
- Probable performance increase from intrinsics or otherwise
Cons:
- Less control of API and QoL
See https://discord.com/channels/310555209753690112/770682801607278632/983964998260895794
I made an attempt at creating benchmarks and adding some basic simd to compare Numerics vs robust vs robust /w simd in #2939. But I don't feel like I know enough about how to use the benchmarks because after changing each one to do 1000 operations per call instead of 3, the existing robust stuff was apparently fastest? So somebody who knows more about benchmarking and/or simd needs to have a look.
Disclaimer, I have NFI what I was doing while writing #2939, I was mostly just having fun learning SIMD.
We should just rip the bandaid off on this and figure out what QoL things we'll be missing. I guess the biggest one is missing implicit conversion from (float, float). Losing so much perf on math operations just isn't very nice.
I'm still unsure about whether I'm doing the benchmarking wrong somehow, but even when just taking the benchmark functions directly from dotnet's githhub, the matrix multiplication seems to be twice as slow as doing it manually with SIMD?
E.g., see this benchmark:
using BenchmarkDotNet.Attributes;
using Robust.Shared.Analyzers;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.X86;
using System.Runtime.Intrinsics;
using System.Numerics;
using Matrix3x2 = System.Numerics.Matrix3x2;
namespace Robust.Benchmarks.Numerics;
[Virtual]
public class MatrixBenchmark
{
// benchmarks defined in a way that matches https://github.com/dotnet/performance/blob/main/src/benchmarks/micro/libraries/System.Numerics.Vectors/Perf_Matrix3x2.cs
[Benchmark(Baseline = true)]
public Matrix3x2 NumericsMultiply() => Matrix3x2.Multiply(Matrix3x2.Identity, Matrix3x2.Identity);
[Benchmark]
public Matrix3x2 NumericsOperator() => Matrix3x2.Identity * Matrix3x2.Identity;
[Benchmark]
public MyMatrix3x2 MyMatrixMultiply() => MyMatrix3x2.Multiply(MyMatrix3x2.Identity, MyMatrix3x2.Identity);
[Benchmark]
public MyMatrix3x2 MyMatrixFMA() => MyMatrix3x2.MultiplyFMA(MyMatrix3x2.Identity, MyMatrix3x2.Identity);
[StructLayout(LayoutKind.Explicit)]
public struct MyMatrix3x2
{
[FieldOffset(sizeof(float) * 0)] public float R0C0;
[FieldOffset(sizeof(float) * 1)] public float R0C1;
[FieldOffset(sizeof(float) * 2)] public float R1C0;
[FieldOffset(sizeof(float) * 3)] public float R1C1;
[FieldOffset(sizeof(float) * 4)] public float R0C2;
[FieldOffset(sizeof(float) * 5)] public float R1C2;
[FieldOffset(sizeof(float) * 4)] Vector2 _offset;
[FieldOffset(sizeof(float) * 0)] Vector4 _subMat;
// Here we define the identity matrix in the same way as in https://github.com/dotnet/runtime/blob/main/src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix3x2.cs
public static MyMatrix3x2 Identity => _identity;
private static readonly MyMatrix3x2 _identity = new(1, 0, 0, 0, 1, 0);
public MyMatrix3x2(float r0c0, float r0c1, float r0c2, float r1c0, float r1c1, float r1c2)
{
Unsafe.SkipInit(out this);
R0C0 = r0c0;
R0C1 = r0c1;
R0C2 = r0c2;
R1C0 = r1c0;
R1C1 = r1c1;
R1C2 = r1c2;
}
public static MyMatrix3x2 Multiply(in MyMatrix3x2 left, in MyMatrix3x2 right)
{
MyMatrix3x2 result = default;
result.R0C0 = right.R0C0 * left.R0C0 + right.R0C1 * left.R1C0;
result.R0C1 = right.R0C0 * left.R0C1 + right.R0C1 * left.R1C1;
result.R0C2 = right.R0C0 * left.R0C2 + right.R0C1 * left.R1C2 + right.R0C2;
result.R1C0 = right.R1C0 * left.R0C0 + right.R1C1 * left.R1C0;
result.R1C1 = right.R1C0 * left.R0C1 + right.R1C1 * left.R1C1;
result.R1C2 = right.R1C0 * left.R0C2 + right.R1C1 * left.R1C2 + right.R1C2;
return result;
}
public static MyMatrix3x2 MultiplyFMA(in MyMatrix3x2 left, in MyMatrix3x2 right)
{
MultiplyFMA(in left, in right, out MyMatrix3x2 result);
return result;
}
public static unsafe void MultiplyFMA(in MyMatrix3x2 left, in MyMatrix3x2 right, out MyMatrix3x2 result)
{
Unsafe.SkipInit(out result);
var subMatrixLeft = left._subMat.AsVector128();
var subMatrixRight = right._subMat.AsVector128();
var vecA = Sse.Shuffle(subMatrixRight, subMatrixRight, 0b10_10_00_00);
var vecB = Sse.MoveLowToHigh(subMatrixLeft, subMatrixLeft);
var vecC = Sse.Multiply(vecA, vecB);
vecA = Sse.Shuffle(subMatrixRight, subMatrixRight, 0b11_11_01_01);
vecB = Sse.MoveHighToLow(subMatrixLeft, subMatrixLeft);
vecC = Fma.MultiplyAdd(vecA, vecB, vecC);
result._subMat = vecC.AsVector4();
vecA = left._offset.AsVector128();
vecB = Sse.UnpackLow(vecA, vecA);
vecC = Sse.Shuffle(subMatrixRight, subMatrixRight, 0b11_01_10_00);
vecA = Sse.Multiply(vecB, vecC);
vecB = Sse.MoveHighToLow(vecC, vecA);
vecC = Sse.Add(vecA, vecB);
vecA = right._offset.AsVector128();
vecB = Sse.Add(vecA, vecC);
result._offset = vecB.AsVector2();
}
}
}