cglm
cglm copied to clipboard
Proposal: Implementing Double/Half/... Precision
Function names are not HIDDEN in macro, in this way we can use 'Go To Definition' in IDEs. With making raw implementation Macro/Template allows us to implement double precision with no cost (copy-paste only). Individual types can implement SIMD separately like below; float impl used SIMD since double does not (for now).
Also exposing SCALAR/RAW implementation will allow us to compare SIMD versions with SCALAR versions in TESTS which we must do.
Here is the proposal:
#define glm_mat4_inv_scalar_impl(NS, T, T_mat4, FN_SUFFIX) \
{ \
T t[6]; \
T det; \
T a = mat[0][0], b = mat[0][1], c = mat[0][2], d = mat[0][3], \
e = mat[1][0], f = mat[1][1], g = mat[1][2], h = mat[1][3], \
i = mat[2][0], j = mat[2][1], k = mat[2][2], l = mat[2][3], \
m = mat[3][0], n = mat[3][1], o = mat[3][2], p = mat[3][3]; \
\
t[0] = k * p - o * l; t[1] = j * p - n * l; t[2] = j * o - n * k; \
t[3] = i * p - m * l; t[4] = i * o - m * k; t[5] = i * n - m * j; \
\
dest[0][0] = f * t[0] - g * t[1] + h * t[2]; \
dest[1][0] =-(e * t[0] - g * t[3] + h * t[4]); \
dest[2][0] = e * t[1] - f * t[3] + h * t[5]; \
dest[3][0] =-(e * t[2] - f * t[4] + g * t[5]); \
\
dest[0][1] =-(b * t[0] - c * t[1] + d * t[2]); \
dest[1][1] = a * t[0] - c * t[3] + d * t[4]; \
dest[2][1] =-(a * t[1] - b * t[3] + d * t[5]); \
dest[3][1] = a * t[2] - b * t[4] + c * t[5]; \
\
t[0] = g * p - o * h; t[1] = f * p - n * h; t[2] = f * o - n * g; \
t[3] = e * p - m * h; t[4] = e * o - m * g; t[5] = e * n - m * f; \
\
dest[0][2] = b * t[0] - c * t[1] + d * t[2]; \
dest[1][2] =-(a * t[0] - c * t[3] + d * t[4]); \
dest[2][2] = a * t[1] - b * t[3] + d * t[5]; \
dest[3][2] =-(a * t[2] - b * t[4] + c * t[5]); \
\
t[0] = g * l - k * h; t[1] = f * l - j * h; t[2] = f * k - j * g; \
t[3] = e * l - i * h; t[4] = e * k - i * g; t[5] = e * j - i * f; \
\
dest[0][3] =-(b * t[0] - c * t[1] + d * t[2]); \
dest[1][3] = a * t[0] - c * t[3] + d * t[4]; \
dest[2][3] =-(a * t[1] - b * t[3] + d * t[5]); \
dest[3][3] = a * t[2] - b * t[4] + c * t[5]; \
\
det = 1.0f / (a * dest[0][0] + b * dest[1][0] \
+ c * dest[2][0] + d * dest[3][0]); \
\
NS ## _mat4_scale_p(dest, det); \
}
#define glm_mat4_inv_scalar_impl_raw(NS, T, T_mat4, FN_SUFFIX) \
CGLM_INLINE \
void \
NS ## _mat4_inv ## FN_SUFFIX(T_mat4 mat, T_mat4 dest) \
glm_mat4_inv_scalar_impl(NS, T, T_mat4, FN_SUFFIX)
#if CGLM_EXPOSE_SCALAR_RAW || CGLM_INTERNAL_TESTS
glm_mat4_inv_scalar_impl_raw(glm, float, mat4, _raw); /* glm_mat4_inv_raw() */
glm_mat4_inv_scalar_impl_raw(glm64, double, dmat4, _raw); /* glm64_mat4_inv_raw() */
#endif
/*!
* @brief inverse mat4 and store in dest
*
* @param[in] mat matrix
* @param[out] dest inverse matrix
*/
CGLM_INLINE
void
glm_mat4_inv(mat4 mat, mat4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_mat4_inv_sse2(mat, dest);
#elif defined(CGLM_NEON_FP)
glm_mat4_inv_neon(mat, dest);
#else
glm_mat4_inv_scalar_impl(glm, float, mat4, );
#endif
}
/*!
* @brief inverse dmat4 and store in dest
*
* @param[in] mat matrix
* @param[out] dest inverse matrix
*/
CGLM_INLINE
void
glm64_mat4_inv(dmat4 mat, dmat4 dest) {
glm_mat4_inv_scalar_impl(glm64, double, dmat4, );
}
/* or */
CGLM_INLINE
void
glm_dmat4_inv(dmat4 mat, dmat4 dest) {
glm_mat4_inv_scalar_impl(glm64, double, dmat4, );
}
/* cleanup: end of each file */
#ifndef CGLM_NO_UNDEF_RAW
# undef glm_mat4_inv_scalar_impl_raw
#endif
Hiding function names like glm_mat4_inv() in impl MACRO may cause hard to read I think.
Feedbacks are welcome
What about declaring the type upfront? That way the implementation would be the same, and the user chooses between float/double precision before including cglm.h, or with a compilation flag.
This library does something similar:
https://github.com/felselva/mathc/blob/d672725203fc80f6f79fba64533b87d51c32d714/mathc.h#L133
What if we need both? I think it would be nice to keep both, or add enable/disable compilation flag to reduce binary size maybe. I still think we should keep them in separate namespace e.g. glm64_ or dmat4_ ... to keep things consistent. Replacing float to double is not enough, there are SIMD functions, types, sub func calls ...
Yes, I see, I missread the patch. Forget about it then.