easy_profiler icon indicating copy to clipboard operation
easy_profiler copied to clipboard

Using easy profiler to profile SSE functions

Open PixelClear opened this issue 6 years ago • 3 comments

Hi

I have written vector class that I am using in ray tracing following is code

class Vector3
{

public:

	Vector3()
	{
		e[0] = e[1] = e[2] = e[3] = 0.0f;
	}

	Vector3(float x_, float y_, float z_)
	{
		//EASY_FUNCTION(profiler::colors::Black);

		_mm_store_ps(e, _mm_set_ps(0.0f, z_, y_, x_));
	}


	float x() const { return e[0]; }
	float y() const { return e[1]; }
	float z() const { return e[2]; }

	const Vector3& operator+() const { return *this; }
	Vector3 operator-()
	{
		//EASY_FUNCTION(profiler::colors::Green);
		_declspec(align(16)) float t[] = { -1.0f, -1.0f, -1.0f, -1.0f };
		_mm_store_ps(e, _mm_mul_ps(_mm_load_ps(t), _mm_load_ps(e)));
		return *this;
	}

	float operator[](int i) const { assert(i >= 0 && i <= 2); return e[i]; }
	float& operator[](int i) { assert(i >= 0 && i <= 2); return e[i]; }


	float length() const
	{
		//EASY_FUNCTION(profiler::colors::Green100);
		__m128 t = _mm_load_ps(e);
		return _mm_sqrt_ps(_mm_set1_ps(_mm_dp_ps(t, t, 0xff).m128_f32[0])).m128_f32[0];
	}

	float sqauredLength() const
	{
		//EASY_FUNCTION(profiler::colors::Green200);
		__m128 t = _mm_load_ps(e);
		return _mm_set1_ps(_mm_dp_ps(t, t, 0xff).m128_f32[0]).m128_f32[0];
	}


	void makeUnitVector()
	{
		//EASY_FUNCTION(profiler::colors::Green300);
		_mm_store_ps(e, _mm_div_ps(_mm_load_ps(e), _mm_set1_ps(length())));
	}

	void setX(float _x) { e[0] = _x; }
	void setY(float _y) { e[1] = _y; }
	void setZ(float _z) { e[2] = _z; }

	float minComponent() const
	{
		//EASY_FUNCTION(profiler::colors::Green400);
		__m128 t = _mm_load_ps(e);
		t = _mm_min_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(2, 1, 0, 0)));
		t = _mm_min_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(1, 0, 2, 2)));
		return t.m128_f32[0];
	}

	float maxComponent() const
	{
		//EASY_FUNCTION(profiler::colors::Green500);
		__m128 t = _mm_load_ps(e);
		t = _mm_max_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(2, 1, 0, 0)));
		t = _mm_max_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(1, 0, 2, 2)));
		return t.m128_f32[0];
	}

	inline __m128 abs_ps(__m128 x) const
	{
		static const __m128 sign_mask = _mm_set1_ps(-0.f); // -0.f = 1 << 31
		return _mm_andnot_ps(sign_mask, x);
	}

	float maxAbsComponent() const
	{
		//EASY_FUNCTION(profiler::colors::Green600);
		__m128 t = _mm_load_ps(e);
		t = abs_ps(t);

		t = _mm_max_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(2, 1, 0, 0)));
		t = _mm_max_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(1, 0, 2, 2)));

		return t.m128_f32[0];
	}

	float minAbsComponent() const
	{
		//EASY_FUNCTION(profiler::colors::Green700);
		__m128 t = _mm_load_ps(e);
		t = abs_ps(t);

		t = _mm_min_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(2, 1, 0, 0)));
		t = _mm_min_ps(t, _mm_shuffle_ps(t, t, _MM_SHUFFLE(1, 0, 2, 2)));

		return t.m128_f32[0];
	}

	Vector3& operator=(const Vector3& v2)
	{
		//EASY_FUNCTION(profiler::colors::Green800);
		_mm_store_ps(e, _mm_set_ps(0.0f, v2.z(), v2.y(), v2.x()));
		return *this;
	}

	Vector3& operator+=(const Vector3& v2)
	{
		//EASY_FUNCTION(profiler::colors::Green900);
		_mm_store_ps(e, _mm_add_ps(_mm_load_ps(e), _mm_load_ps(v2.e)));
		return *this;
	}

	Vector3& operator-=(const Vector3& v2)
	{
		//EASY_FUNCTION(profiler::colors::GreenA100);
		_mm_store_ps(e, _mm_sub_ps(_mm_load_ps(e), _mm_load_ps(v2.e)));
		return *this;
	}

	Vector3& operator*=(const float t)
	{
		//EASY_FUNCTION(profiler::colors::GreenA200);
		_mm_store_ps(e, _mm_mul_ps(_mm_set1_ps(t), _mm_load_ps(e)));
		return *this;
	}

	Vector3& operator/=(const float t)
	{
		//EASY_FUNCTION(profiler::colors::GreenA400);
		_mm_store_ps(e, _mm_div_ps(_mm_load_ps(e), _mm_set1_ps(t)));
		return *this;
	}

	friend bool operator==(const Vector3& v1, const Vector3& v2);
	friend bool operator!=(const Vector3& v1, const Vector3& v2);
	friend std::istream& operator>>(std::istream& is, Vector3& t);
	friend std::ostream& operator<<(std::ostream& os, const Vector3& t);
	friend Vector3 operator+(const Vector3& v1, const Vector3& v2);
	friend Vector3 operator-(const Vector3& v1, const Vector3& v2);
	friend Vector3 operator/(const Vector3& v1, float scalar);
	friend Vector3 operator*(const Vector3& v1, float scalar);
	friend Vector3 operator*(float scalar, const Vector3& v1);
	friend Vector3 unitVector(const Vector3& v);
	friend Vector3 minVec(const Vector3& v1, const Vector3& v2);
	friend Vector3 maxVec(const Vector3& v1, const Vector3& v2);
	friend Vector3 cross(const Vector3& v1, const Vector3& v2);
	friend float dot(const Vector3& v1, const Vector3& v2);
	friend float tripleProduct(const Vector3& v1, const Vector3& v2, const Vector3& v3);

private:

	_declspec(align(16)) float e[4];
};


bool operator==(const Vector3& v1, const Vector3& v2) {
	//EASY_FUNCTION(profiler::colors::LightGreen100);

	if (v1.x() != v2.x()) return false;
	if (v1.y() != v2.y()) return false;
	if (v1.z() != v2.z()) return false;
	return true;
}

bool operator!=(const Vector3& v1, const Vector3& v2) {
	//EASY_FUNCTION(profiler::colors::LightGreen200);

	return !(v1 == v2);
}

std::istream& operator>>(std::istream& is, Vector3& t) {
	float temp;
	is >> temp;
	t.setX(temp);
	is >> temp;
	t.setY(temp);
	is >> temp;
	t.setZ(temp);
	return is;
}

std::ostream& operator<<(std::ostream& os, const Vector3& t) {
	os << "(" << t.x() << " " << t.y() << " " << t.z() << ")";
	return os;
}

Vector3 operator+(const Vector3& v1, const Vector3& v2) {
	//EASY_FUNCTION(profiler::colors::LightGreen300);

	Vector3 t;
	_mm_store_ps(t.e, _mm_add_ps(_mm_load_ps(v1.e), _mm_load_ps(v2.e)));
	return t;
}

Vector3 operator-(const Vector3& v1, const Vector3& v2) {
	//EASY_FUNCTION(profiler::colors::LightGreen400);

	Vector3 t;
	_mm_store_ps(t.e, _mm_sub_ps(_mm_load_ps(v1.e), _mm_load_ps(v2.e)));
	return t;
}

Vector3 operator/(const Vector3& v1, float scalar) {
	//EASY_FUNCTION(profiler::colors::LightGreen500);

	Vector3 t;
	_mm_store_ps(t.e, _mm_div_ps(_mm_load_ps(v1.e), _mm_set1_ps(scalar)));
	return t;
}

Vector3 operator*(const Vector3& v1, float scalar) {
	//EASY_FUNCTION(profiler::colors::LightGreen600);

	Vector3 t;
	_mm_store_ps(t.e, _mm_mul_ps(_mm_load_ps(v1.e), _mm_set1_ps(scalar)));
	return t;
}

Vector3 operator*(float scalar, const Vector3& v1) {
	//EASY_FUNCTION(profiler::colors::LightGreen700);

	Vector3 t;
	_mm_store_ps(t.e, _mm_mul_ps(_mm_load_ps(v1.e), _mm_set1_ps(scalar)));
	return t;
}

Vector3 unitVector(const Vector3& v) {
	//EASY_FUNCTION(profiler::colors::LightGreen800);

	float length = v.length();
	return v / length;
}

Vector3 minVec(const Vector3& v1, const Vector3& v2) {
	//EASY_FUNCTION(profiler::colors::LightGreen900);

	Vector3 t;
	_mm_store_ps(t.e, _mm_min_ps(_mm_load_ps(v1.e), _mm_load_ps(v2.e)));
	return t;
}

Vector3 maxVec(const Vector3& v1, const Vector3& v2) {
	//EASY_FUNCTION(profiler::colors::LightBlue);

	Vector3 t;
	_mm_store_ps(t.e, _mm_max_ps(_mm_load_ps(v1.e), _mm_load_ps(v2.e)));
	return t;
}

Vector3 cross(const Vector3& v1, const Vector3& v2) {
	//EASY_FUNCTION(profiler::colors::LightBlue100);
	Vector3 t;
	__m128 a = _mm_load_ps(v1.e);
	__m128 b = _mm_load_ps(v2.e);

	_mm_store_ps(t.e, _mm_sub_ps(
		_mm_mul_ps(_mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 1, 0, 2))),
		_mm_mul_ps(_mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 0, 2, 1)))
	));

	return t;
}

float dot(const Vector3& v1, const Vector3& v2) {
	//EASY_FUNCTION(profiler::colors::LightBlue200);
	return _mm_dp_ps(_mm_load_ps(v1.e), _mm_load_ps(v2.e), 0xff).m128_f32[0];
}

float tripleProduct(const Vector3& v1, const Vector3& v2, const Vector3& v3) {
	//EASY_FUNCTION(profiler::colors::LightBlue100);
	return dot(cross(v1, v2), v3);
}

I have commented EASY_FUNCTION macro. If I enable it I get crash randomly in SIMD functions.

Is it know issue ? Is there any solution for this ?

PixelClear avatar May 16 '18 18:05 PixelClear

Hi @PixelClear
Thanks for report
No, we haven't faced such error yet
Can you, please, provide your system information? (OS (Windows, as I see), architechture, compiler version...)

Does the crash occur only if there are uncommented EASY_FUNCTION in Vector3 methods? I mean what if you add EASY_FUNCTION or EASY_BLOCK into another regular functions in your code?
If you will try to replace EASY_FUNCTION with EASY_BLOCK, does the crash still occur?

cas4ey avatar May 17 '18 13:05 cas4ey

Hi @cas4ey

My System information is following Windows 10 Intel i5 Microsoft (R) C/C++ Optimizing Compiler Version 19.13.26129 for x86

The crash occurs only if EASY_FUNCTION is uncommented in Vector3f functions.

I have tried replacing EASY_FUNCTION with EASY_BLOCK but same happens.

I mostlty get crash in following function

float dot(const Vector3& v1, const Vector3& v2) {
	//EASY_FUNCTION(profiler::colors::LightBlue200);
	return _mm_dp_ps(_mm_load_ps(v1.e), _mm_load_ps(v2.e), 0xff).m128_f32[0];
}

My intial guess is that EASY_FUNCTION expands in function that executes in its own thread and before that my function tries to return though this is my guess.

Hope this will help.

PixelClear avatar May 17 '18 18:05 PixelClear

@PixelClear sorry for the delay

My intial guess is that EASY_FUNCTION expands in function that executes in its own thread and before that my function tries to return though this is my guess.

No, it does not start or execute anything in another thread, it executes in current thread only.

Microsoft (R) C/C++ Optimizing Compiler Version 19.13.26129 for x86

Just to be sure, Visual Studio 2017 x86 ?


Can you try to create small example console app with these vector operators? Does it also crash?
Does x64 version of this example app also crash?
As alternative, you can add these vector operators to profiler_sample application (maybe it would be easier this way).

P.S.: I'm sorry for not performing these tests by myself, too busy at the moment...

cas4ey avatar May 30 '18 14:05 cas4ey