easy_profiler
easy_profiler copied to clipboard
Using easy profiler to profile SSE functions
Hi
I have written vector class that I am using in ray tracing following is code
class Vector3
{
public:
Vector3()
{
e[0] = e[1] = e[2] = e[3] = 0.0f;
}
Vector3(float x_, float y_, float z_)
{
//EASY_FUNCTION(profiler::colors::Black);
_mm_store_ps(e, _mm_set_ps(0.0f, z_, y_, x_));
}
float x() const { return e[0]; }
float y() const { return e[1]; }
float z() const { return e[2]; }
const Vector3& operator+() const { return *this; }
Vector3 operator-()
{
//EASY_FUNCTION(profiler::colors::Green);
_declspec(align(16)) float t[] = { -1.0f, -1.0f, -1.0f, -1.0f };
_mm_store_ps(e, _mm_mul_ps(_mm_load_ps(t), _mm_load_ps(e)));
return *this;
}
float operator[](int i) const { assert(i >= 0 && i <= 2); return e[i]; }
float& operator[](int i) { assert(i >= 0 && i <= 2); return e[i]; }
float length() const
{
//EASY_FUNCTION(profiler::colors::Green100);
__m128 t = _mm_load_ps(e);
return _mm_sqrt_ps(_mm_set1_ps(_mm_dp_ps(t, t, 0xff).m128_f32[0])).m128_f32[0];
}
float sqauredLength() const
{
//EASY_FUNCTION(profiler::colors::Green200);
__m128 t = _mm_load_ps(e);
return _mm_set1_ps(_mm_dp_ps(t, t, 0xff).m128_f32[0]).m128_f32[0];
}
void makeUnitVector()
{
//EASY_FUNCTION(profiler::colors::Green300);
_mm_store_ps(e, _mm_div_ps(_mm_load_ps(e), _mm_set1_ps(length())));
}
void setX(float _x) { e[0] = _x; }
void setY(float _y) { e[1] = _y; }
void setZ(float _z) { e[2] = _z; }
float minComponent() const
{
//EASY_FUNCTION(profiler::colors::Green400);
__m128 t = _mm_load_ps(e);
t = _mm_min_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(2, 1, 0, 0)));
t = _mm_min_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(1, 0, 2, 2)));
return t.m128_f32[0];
}
float maxComponent() const
{
//EASY_FUNCTION(profiler::colors::Green500);
__m128 t = _mm_load_ps(e);
t = _mm_max_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(2, 1, 0, 0)));
t = _mm_max_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(1, 0, 2, 2)));
return t.m128_f32[0];
}
inline __m128 abs_ps(__m128 x) const
{
static const __m128 sign_mask = _mm_set1_ps(-0.f); // -0.f = 1 << 31
return _mm_andnot_ps(sign_mask, x);
}
float maxAbsComponent() const
{
//EASY_FUNCTION(profiler::colors::Green600);
__m128 t = _mm_load_ps(e);
t = abs_ps(t);
t = _mm_max_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(2, 1, 0, 0)));
t = _mm_max_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(1, 0, 2, 2)));
return t.m128_f32[0];
}
float minAbsComponent() const
{
//EASY_FUNCTION(profiler::colors::Green700);
__m128 t = _mm_load_ps(e);
t = abs_ps(t);
t = _mm_min_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(2, 1, 0, 0)));
t = _mm_min_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(1, 0, 2, 2)));
return t.m128_f32[0];
}
Vector3& operator=(const Vector3& v2)
{
//EASY_FUNCTION(profiler::colors::Green800);
_mm_store_ps(e, _mm_set_ps(0.0f, v2.z(), v2.y(), v2.x()));
return *this;
}
Vector3& operator+=(const Vector3& v2)
{
//EASY_FUNCTION(profiler::colors::Green900);
_mm_store_ps(e, _mm_add_ps(_mm_load_ps(e), _mm_load_ps(v2.e)));
return *this;
}
Vector3& operator-=(const Vector3& v2)
{
//EASY_FUNCTION(profiler::colors::GreenA100);
_mm_store_ps(e, _mm_sub_ps(_mm_load_ps(e), _mm_load_ps(v2.e)));
return *this;
}
Vector3& operator*=(const float t)
{
//EASY_FUNCTION(profiler::colors::GreenA200);
_mm_store_ps(e, _mm_mul_ps(_mm_set1_ps(t), _mm_load_ps(e)));
return *this;
}
Vector3& operator/=(const float t)
{
//EASY_FUNCTION(profiler::colors::GreenA400);
_mm_store_ps(e, _mm_div_ps(_mm_load_ps(e), _mm_set1_ps(t)));
return *this;
}
friend bool operator==(const Vector3& v1, const Vector3& v2);
friend bool operator!=(const Vector3& v1, const Vector3& v2);
friend std::istream& operator>>(std::istream& is, Vector3& t);
friend std::ostream& operator<<(std::ostream& os, const Vector3& t);
friend Vector3 operator+(const Vector3& v1, const Vector3& v2);
friend Vector3 operator-(const Vector3& v1, const Vector3& v2);
friend Vector3 operator/(const Vector3& v1, float scalar);
friend Vector3 operator*(const Vector3& v1, float scalar);
friend Vector3 operator*(float scalar, const Vector3& v1);
friend Vector3 unitVector(const Vector3& v);
friend Vector3 minVec(const Vector3& v1, const Vector3& v2);
friend Vector3 maxVec(const Vector3& v1, const Vector3& v2);
friend Vector3 cross(const Vector3& v1, const Vector3& v2);
friend float dot(const Vector3& v1, const Vector3& v2);
friend float tripleProduct(const Vector3& v1, const Vector3& v2, const Vector3& v3);
private:
_declspec(align(16)) float e[4];
};
bool operator==(const Vector3& v1, const Vector3& v2) {
//EASY_FUNCTION(profiler::colors::LightGreen100);
if (v1.x() != v2.x()) return false;
if (v1.y() != v2.y()) return false;
if (v1.z() != v2.z()) return false;
return true;
}
bool operator!=(const Vector3& v1, const Vector3& v2) {
//EASY_FUNCTION(profiler::colors::LightGreen200);
return !(v1 == v2);
}
std::istream& operator>>(std::istream& is, Vector3& t) {
float temp;
is >> temp;
t.setX(temp);
is >> temp;
t.setY(temp);
is >> temp;
t.setZ(temp);
return is;
}
std::ostream& operator<<(std::ostream& os, const Vector3& t) {
os << "(" << t.x() << " " << t.y() << " " << t.z() << ")";
return os;
}
Vector3 operator+(const Vector3& v1, const Vector3& v2) {
//EASY_FUNCTION(profiler::colors::LightGreen300);
Vector3 t;
_mm_store_ps(t.e, _mm_add_ps(_mm_load_ps(v1.e), _mm_load_ps(v2.e)));
return t;
}
Vector3 operator-(const Vector3& v1, const Vector3& v2) {
//EASY_FUNCTION(profiler::colors::LightGreen400);
Vector3 t;
_mm_store_ps(t.e, _mm_sub_ps(_mm_load_ps(v1.e), _mm_load_ps(v2.e)));
return t;
}
Vector3 operator/(const Vector3& v1, float scalar) {
//EASY_FUNCTION(profiler::colors::LightGreen500);
Vector3 t;
_mm_store_ps(t.e, _mm_div_ps(_mm_load_ps(v1.e), _mm_set1_ps(scalar)));
return t;
}
Vector3 operator*(const Vector3& v1, float scalar) {
//EASY_FUNCTION(profiler::colors::LightGreen600);
Vector3 t;
_mm_store_ps(t.e, _mm_mul_ps(_mm_load_ps(v1.e), _mm_set1_ps(scalar)));
return t;
}
Vector3 operator*(float scalar, const Vector3& v1) {
//EASY_FUNCTION(profiler::colors::LightGreen700);
Vector3 t;
_mm_store_ps(t.e, _mm_mul_ps(_mm_load_ps(v1.e), _mm_set1_ps(scalar)));
return t;
}
Vector3 unitVector(const Vector3& v) {
//EASY_FUNCTION(profiler::colors::LightGreen800);
float length = v.length();
return v / length;
}
Vector3 minVec(const Vector3& v1, const Vector3& v2) {
//EASY_FUNCTION(profiler::colors::LightGreen900);
Vector3 t;
_mm_store_ps(t.e, _mm_min_ps(_mm_load_ps(v1.e), _mm_load_ps(v2.e)));
return t;
}
Vector3 maxVec(const Vector3& v1, const Vector3& v2) {
//EASY_FUNCTION(profiler::colors::LightBlue);
Vector3 t;
_mm_store_ps(t.e, _mm_max_ps(_mm_load_ps(v1.e), _mm_load_ps(v2.e)));
return t;
}
Vector3 cross(const Vector3& v1, const Vector3& v2) {
//EASY_FUNCTION(profiler::colors::LightBlue100);
Vector3 t;
__m128 a = _mm_load_ps(v1.e);
__m128 b = _mm_load_ps(v2.e);
_mm_store_ps(t.e, _mm_sub_ps(
_mm_mul_ps(_mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 1, 0, 2))),
_mm_mul_ps(_mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 0, 2, 1)))
));
return t;
}
float dot(const Vector3& v1, const Vector3& v2) {
//EASY_FUNCTION(profiler::colors::LightBlue200);
return _mm_dp_ps(_mm_load_ps(v1.e), _mm_load_ps(v2.e), 0xff).m128_f32[0];
}
float tripleProduct(const Vector3& v1, const Vector3& v2, const Vector3& v3) {
//EASY_FUNCTION(profiler::colors::LightBlue100);
return dot(cross(v1, v2), v3);
}
I have commented EASY_FUNCTION macro. If I enable it I get crash randomly in SIMD functions.
Is it know issue ? Is there any solution for this ?
Hi @PixelClear
Thanks for report
No, we haven't faced such error yet
Can you, please, provide your system information? (OS (Windows, as I see), architechture, compiler version...)
Does the crash occur only if there are uncommented EASY_FUNCTION
in Vector3
methods?
I mean what if you add EASY_FUNCTION
or EASY_BLOCK
into another regular functions in your code?
If you will try to replace EASY_FUNCTION
with EASY_BLOCK
, does the crash still occur?
Hi @cas4ey
My System information is following Windows 10 Intel i5 Microsoft (R) C/C++ Optimizing Compiler Version 19.13.26129 for x86
The crash occurs only if EASY_FUNCTION is uncommented in Vector3f functions.
I have tried replacing EASY_FUNCTION with EASY_BLOCK but same happens.
I mostlty get crash in following function
float dot(const Vector3& v1, const Vector3& v2) {
//EASY_FUNCTION(profiler::colors::LightBlue200);
return _mm_dp_ps(_mm_load_ps(v1.e), _mm_load_ps(v2.e), 0xff).m128_f32[0];
}
My intial guess is that EASY_FUNCTION expands in function that executes in its own thread and before that my function tries to return though this is my guess.
Hope this will help.
@PixelClear sorry for the delay
My intial guess is that EASY_FUNCTION expands in function that executes in its own thread and before that my function tries to return though this is my guess.
No, it does not start or execute anything in another thread, it executes in current thread only.
Microsoft (R) C/C++ Optimizing Compiler Version 19.13.26129 for x86
Just to be sure, Visual Studio 2017 x86 ?
Can you try to create small example console app with these vector operators? Does it also crash?
Does x64 version of this example app also crash?
As alternative, you can add these vector operators to profiler_sample
application (maybe it would be easier this way).
P.S.: I'm sorry for not performing these tests by myself, too busy at the moment...