cglm Proposal: Implementing Double/Half/... Precision

trafficstars

Function names are not HIDDEN in macro, in this way we can use 'Go To Definition' in IDEs. With making raw implementation Macro/Template allows us to implement double precision with no cost (copy-paste only). Individual types can implement SIMD separately like below; float impl used SIMD since double does not (for now).

Also exposing SCALAR/RAW implementation will allow us to compare SIMD versions with SCALAR versions in TESTS which we must do.

Here is the proposal:

#define glm_mat4_inv_scalar_impl(NS, T, T_mat4, FN_SUFFIX)                    \
{                                                                             \
  T t[6];                                                                     \
  T det;                                                                      \
  T a = mat[0][0], b = mat[0][1], c = mat[0][2], d = mat[0][3],               \
    e = mat[1][0], f = mat[1][1], g = mat[1][2], h = mat[1][3],               \
    i = mat[2][0], j = mat[2][1], k = mat[2][2], l = mat[2][3],               \
    m = mat[3][0], n = mat[3][1], o = mat[3][2], p = mat[3][3];               \
                                                                              \
  t[0] = k * p - o * l; t[1] = j * p - n * l; t[2] = j * o - n * k;           \
  t[3] = i * p - m * l; t[4] = i * o - m * k; t[5] = i * n - m * j;           \
                                                                              \
  dest[0][0] =  f * t[0] - g * t[1] + h * t[2];                               \
  dest[1][0] =-(e * t[0] - g * t[3] + h * t[4]);                              \
  dest[2][0] =  e * t[1] - f * t[3] + h * t[5];                               \
  dest[3][0] =-(e * t[2] - f * t[4] + g * t[5]);                              \
                                                                              \
  dest[0][1] =-(b * t[0] - c * t[1] + d * t[2]);                              \
  dest[1][1] =  a * t[0] - c * t[3] + d * t[4];                               \
  dest[2][1] =-(a * t[1] - b * t[3] + d * t[5]);                              \
  dest[3][1] =  a * t[2] - b * t[4] + c * t[5];                               \
                                                                              \
  t[0] = g * p - o * h; t[1] = f * p - n * h; t[2] = f * o - n * g;           \
  t[3] = e * p - m * h; t[4] = e * o - m * g; t[5] = e * n - m * f;           \
                                                                              \
  dest[0][2] =  b * t[0] - c * t[1] + d * t[2];                               \
  dest[1][2] =-(a * t[0] - c * t[3] + d * t[4]);                              \
  dest[2][2] =  a * t[1] - b * t[3] + d * t[5];                               \
  dest[3][2] =-(a * t[2] - b * t[4] + c * t[5]);                              \
                                                                              \
  t[0] = g * l - k * h; t[1] = f * l - j * h; t[2] = f * k - j * g;           \
  t[3] = e * l - i * h; t[4] = e * k - i * g; t[5] = e * j - i * f;           \
                                                                              \
  dest[0][3] =-(b * t[0] - c * t[1] + d * t[2]);                              \
  dest[1][3] =  a * t[0] - c * t[3] + d * t[4];                               \
  dest[2][3] =-(a * t[1] - b * t[3] + d * t[5]);                              \
  dest[3][3] =  a * t[2] - b * t[4] + c * t[5];                               \
                                                                              \
  det = 1.0f / (a * dest[0][0] + b * dest[1][0]                               \
              + c * dest[2][0] + d * dest[3][0]);                             \
                                                                              \
  NS ## _mat4_scale_p(dest, det);                                             \
}

#define glm_mat4_inv_scalar_impl_raw(NS, T, T_mat4, FN_SUFFIX)                \
CGLM_INLINE                                                                   \
void                                                                          \
NS ## _mat4_inv ## FN_SUFFIX(T_mat4 mat, T_mat4 dest)                         \
  glm_mat4_inv_scalar_impl(NS, T, T_mat4, FN_SUFFIX)


#if CGLM_EXPOSE_SCALAR_RAW || CGLM_INTERNAL_TESTS
glm_mat4_inv_scalar_impl_raw(glm,   float,  mat4,  _raw);  /* glm_mat4_inv_raw()   */
glm_mat4_inv_scalar_impl_raw(glm64, double, dmat4, _raw);  /* glm64_mat4_inv_raw() */
#endif

/*!
 * @brief inverse mat4 and store in dest
 *
 * @param[in]  mat  matrix
 * @param[out] dest inverse matrix
 */
CGLM_INLINE
void
glm_mat4_inv(mat4 mat, mat4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
  glm_mat4_inv_sse2(mat, dest);
#elif defined(CGLM_NEON_FP)
  glm_mat4_inv_neon(mat, dest);
#else
  glm_mat4_inv_scalar_impl(glm, float, mat4, );
#endif
}

/*!
 * @brief inverse dmat4 and store in dest
 *
 * @param[in]  mat  matrix
 * @param[out] dest inverse matrix
 */
CGLM_INLINE
void
glm64_mat4_inv(dmat4 mat, dmat4 dest) {
  glm_mat4_inv_scalar_impl(glm64, double, dmat4, );
}

/* or */
CGLM_INLINE
void
glm_dmat4_inv(dmat4 mat, dmat4 dest) {
  glm_mat4_inv_scalar_impl(glm64, double, dmat4, );
}


/* cleanup: end of each file */
#ifndef CGLM_NO_UNDEF_RAW
#  undef glm_mat4_inv_scalar_impl_raw
#endif

Hiding function names like glm_mat4_inv() in impl MACRO may cause hard to read I think.

Feedbacks are welcome

May 07 '21 01:05 recp

What about declaring the type upfront? That way the implementation would be the same, and the user chooses between float/double precision before including cglm.h, or with a compilation flag.

This library does something similar:

https://github.com/felselva/mathc/blob/d672725203fc80f6f79fba64533b87d51c32d714/mathc.h#L133

Apr 16 '23 20:04 sacereda

What if we need both? I think it would be nice to keep both, or add enable/disable compilation flag to reduce binary size maybe. I still think we should keep them in separate namespace e.g. glm64_ or dmat4_ ... to keep things consistent. Replacing float to double is not enough, there are SIMD functions, types, sub func calls ...

Apr 16 '23 22:04 recp

Yes, I see, I missread the patch. Forget about it then.

Apr 17 '23 07:04 sacereda

cglm cglm copied to clipboard

Proposal: Implementing Double/Half/... Precision

cglm
cglm copied to clipboard