NVEnc icon indicating copy to clipboard operation
NVEnc copied to clipboard

colorspace_conv: failed to build program source.

Open feureau opened this issue 4 months ago • 4 comments

I upgraded from a previous version to 7.69 because of the number. ( ͡° ͜ʖ ͡°) However, on Windows 11 with NVIDIA video effects sdk v0.7.2 (ada) converting from SDR to HDR stopped working now, I'm using this command line:

NVEncC64 --avhw --codec av1 --profile high --qvbr 0 --preset p4 --output-depth 10 --multipass 2pass-full --lookahead 32 --nonrefp --aq --aq-temporal --aq-strength 0 --transfer auto --audio-copy --chapter-copy --key-on-chapter --metadata copy --vpp-ngx-truehdr --colormatrix bt2020nc --colorprim bt2020 --transfer smpte2084 -i INPUT -o OUTPUT.mkv

and gave me this error:

colorspace_conv: failed to build program source. colorspace_conv: Runtime compilation failed colorspace_conv: --------------------------------------- colorspace_conv: --- Source of colorspace_conv --- colorspace_conv: --------------------------------------- colorspace_conv: 1 #ifndef _JITIFY_INCLUDE_GUARD_3F6B24FAF8F3CEB7 colorspace_conv: 2 #define _JITIFY_INCLUDE_GUARD_3F6B24FAF8F3CEB7 colorspace_conv: 3 #ifdef __CUDACC_RTC__ colorspace_conv: 4 #define COLORSPACE_FUNC __device__ __inline__ colorspace_conv: 5 #else colorspace_conv: 6 #define COLORSPACE_FUNC static colorspace_conv: 7 #include <cmath> colorspace_conv: 8 #include <cfloat> colorspace_conv: 9 _Pragma("warning") (push) colorspace_conv: 10 _Pragma("warning") (disable: 4819) colorspace_conv: 11 #include <cuda_runtime.h> colorspace_conv: 12 _Pragma("warning") (pop) colorspace_conv: 13 #endif colorspace_conv: 14 colorspace_conv: 15 typedef float4 LUTVEC; colorspace_conv: 16 colorspace_conv: 17 #ifndef clamp colorspace_conv: 18 #define clamp(x, low, high) (((x) <= (high)) ? (((x) >= (low)) ? (x) : (low)) : (high)) colorspace_conv: 19 #endif colorspace_conv: 20 colorspace_conv: 21 colorspace_conv: 22 const float REC709_ALPHA = 1.09929682680944f; colorspace_conv: 23 const float REC709_BETA = 0.018053968510807f; colorspace_conv: 24 colorspace_conv: 25 const float SMPTE_240M_ALPHA = 1.111572195921731f; colorspace_conv: 26 const float SMPTE_240M_BETA = 0.022821585529445f; colorspace_conv: 27 colorspace_conv: 28 // Adjusted for continuity of first derivative. colorspace_conv: 29 const float SRGB_ALPHA = 1.055010718947587f; colorspace_conv: 30 const float SRGB_BETA = 0.003041282560128f; colorspace_conv: 31 colorspace_conv: 32 const float ST2084_M1 = 0.1593017578125f; colorspace_conv: 33 const float ST2084_M2 = 78.84375f; colorspace_conv: 34 const float ST2084_C1 = 0.8359375f; colorspace_conv: 35 const float ST2084_C2 = 18.8515625f; colorspace_conv: 36 const float ST2084_C3 = 18.6875f; colorspace_conv: 37 colorspace_conv: 38 const float ARIB_B67_A = 0.17883277f; colorspace_conv: 39 const float ARIB_B67_B = 0.28466892f; colorspace_conv: 40 const float ARIB_B67_C = 0.55991073f; colorspace_conv: 41 colorspace_conv: 42 const float FLOAT_EPS = 1.175494351e-38f; colorspace_conv: 43 colorspace_conv: 44 const float MP_REF_WHITE = 203.0f; colorspace_conv: 45 const float MP_REF_WHITE_HLG = 3.17955f; colorspace_conv: 46 colorspace_conv: 47 // Common constants for SMPTE ST.2084 (HDR) colorspace_conv: 48 const float PQ_M1 = 2610.0f / 4096.0f * 1.0f / 4.0f; colorspace_conv: 49 const float PQ_M2 = 2523.0f / 4096.0f * 128.0f; colorspace_conv: 50 const float PQ_C1 = 3424.0f / 4096.0f; colorspace_conv: 51 const float PQ_C2 = 2413.0f / 4096.0f * 32.0f; colorspace_conv: 52 const float PQ_C3 = 2392.0f / 4096.0f * 32.0f; colorspace_conv: 53 colorspace_conv: 54 // Chosen for compatibility with higher precision REC709_ALPHA/REC709_BETA. colorspace_conv: 55 // See: ITU-R BT.2390-2 5.3.1 colorspace_conv: 56 const float ST2084_OOTF_SCALE = 59.49080238715383f; colorspace_conv: 57 colorspace_conv: 58 COLORSPACE_FUNC float rec_709_oetf(float x) { colorspace_conv: 59 if (x < REC709_BETA) colorspace_conv: 60 x = x * 4.5f; colorspace_conv: 61 else colorspace_conv: 62 x = REC709_ALPHA * powf(x, 0.45f) - (REC709_ALPHA - 1.0f); colorspace_conv: 63 colorspace_conv: 64 return x; colorspace_conv: 65 } colorspace_conv: 66 colorspace_conv: 67 COLORSPACE_FUNC float rec_709_inverse_oetf(float x) { colorspace_conv: 68 if (x < 4.5f * REC709_BETA) colorspace_conv: 69 x = x / 4.5f; colorspace_conv: 70 else colorspace_conv: 71 x = powf((x + (REC709_ALPHA - 1.0f)) / REC709_ALPHA, 1.0f / 0.45f); colorspace_conv: 72 colorspace_conv: 73 return x; colorspace_conv: 74 } colorspace_conv: 75 colorspace_conv: 76 // Ignore the BT.1886 provisions for limited contrast and assume an ideal CRT. colorspace_conv: 77 COLORSPACE_FUNC float rec_1886_eotf(float x) { colorspace_conv: 78 return x < 0.0f ? 0.0f : powf(x, 2.4f); colorspace_conv: 79 } colorspace_conv: 80 colorspace_conv: 81 COLORSPACE_FUNC float rec_1886_inverse_eotf(float x) { colorspace_conv: 82 return x < 0.0f ? 0.0f : powf(x, 1.0f / 2.4f); colorspace_conv: 83 } colorspace_conv: 84 colorspace_conv: 85 COLORSPACE_FUNC float ootf_1_2(float x) { colorspace_conv: 86 return x < 0.0f ? x : powf(x, 1.2f); colorspace_conv: 87 } colorspace_conv: 88 colorspace_conv: 89 COLORSPACE_FUNC float inverse_ootf_1_2(float x) { colorspace_conv: 90 return x < 0.0f ? x : powf(x, 1.0f / 1.2f); colorspace_conv: 91 } colorspace_conv: 92 colorspace_conv: 93 COLORSPACE_FUNC float ootf_st2084(float x) { colorspace_conv: 94 return rec_1886_eotf(rec_709_oetf(x * ST2084_OOTF_SCALE)) / 100.0f; colorspace_conv: 95 } colorspace_conv: 96 colorspace_conv: 97 COLORSPACE_FUNC float inverse_ootf_st2084(float x) { colorspace_conv: 98 return rec_709_inverse_oetf(rec_1886_inverse_eotf(x * 100.0f)) / ST2084_OOTF_SCALE; colorspace_conv: 99 } colorspace_conv: 100 colorspace_conv: 101 COLORSPACE_FUNC float log100_oetf(float x) { colorspace_conv: 102 return x <= 0.01f ? 0.0f : 1.0f + log10f(x) * (1.0f / 2.0f); colorspace_conv: 103 } colorspace_conv: 104 colorspace_conv: 105 COLORSPACE_FUNC float log100_inverse_oetf(float x) { colorspace_conv: 106 return x <= 0.0f ? 0.01f : powf(10.0f, 2 * (x - 1.0f)); colorspace_conv: 107 } colorspace_conv: 108 colorspace_conv: 109 COLORSPACE_FUNC float log316_oetf(float x) { colorspace_conv: 110 return x <= 0.00316227766f ? 0.0f : 1.0f + log10f(x) * (1.0f / 2.5f); colorspace_conv: 111 } colorspace_conv: 112 colorspace_conv: 113 COLORSPACE_FUNC float log316_inverse_oetf(float x) { colorspace_conv: 114 return x <= 0.0f ? 0.00316227766f : powf(10.0f, 2.5f * (x - 1.0f)); colorspace_conv: 115 } colorspace_conv: 116 colorspace_conv: 117 COLORSPACE_FUNC float rec_470m_oetf(float x) { colorspace_conv: 118 return x < 0.0f ? 0.0f : powf(x, 2.2f); colorspace_conv: 119 } colorspace_conv: 120 colorspace_conv: 121 COLORSPACE_FUNC float rec_470m_inverse_oetf(float x) { colorspace_conv: 122 return x < 0.0f ? 0.0f : powf(x, 1.0f / 2.2f); colorspace_conv: 123 } colorspace_conv: 124 colorspace_conv: 125 COLORSPACE_FUNC float rec_470bg_oetf(float x) { colorspace_conv: 126 return x < 0.0f ? 0.0f : powf(x, 2.8f); colorspace_conv: 127 } colorspace_conv: 128 colorspace_conv: 129 COLORSPACE_FUNC float rec_470bg_inverse_oetf(float x) { colorspace_conv: 130 return x < 0.0f ? 0.0f : powf(x, 1.0f / 2.8f); colorspace_conv: 131 } colorspace_conv: 132 colorspace_conv: 133 COLORSPACE_FUNC float smpte_240m_oetf(float x) { colorspace_conv: 134 if (x < 4.0f * SMPTE_240M_BETA) colorspace_conv: 135 x = x * (1.0f / 4.0f); colorspace_conv: 136 else colorspace_conv: 137 x = powf((x + (SMPTE_240M_ALPHA - 1.0f)) / SMPTE_240M_ALPHA, 1.0f / 0.45f); colorspace_conv: 138 colorspace_conv: 139 return x; colorspace_conv: 140 } colorspace_conv: 141 colorspace_conv: 142 COLORSPACE_FUNC float smpte_240m_inverse_oetf(float x) { colorspace_conv: 143 if (x < SMPTE_240M_BETA) colorspace_conv: 144 x = x * 4.0f; colorspace_conv: 145 else colorspace_conv: 146 x = SMPTE_240M_ALPHA * powf(x, 0.45f) - (SMPTE_240M_ALPHA - 1.0f); colorspace_conv: 147 colorspace_conv: 148 return x; colorspace_conv: 149 } colorspace_conv: 150 colorspace_conv: 151 COLORSPACE_FUNC float xvycc_oetf(float x) { colorspace_conv: 152 return copysignf(rec_709_oetf(fabsf(x)), x); colorspace_conv: 153 } colorspace_conv: 154 colorspace_conv: 155 float xvycc_inverse_oetf(float x) { colorspace_conv: 156 return copysignf(rec_709_inverse_oetf(fabsf(x)), x); colorspace_conv: 157 } colorspace_conv: 158 colorspace_conv: 159 COLORSPACE_FUNC float arib_b67_oetf(float x) { colorspace_conv: 160 // Prevent negative pixels from yielding NAN. colorspace_conv: 161 x = fmaxf(x, 0.0f); colorspace_conv: 162 colorspace_conv: 163 if (x <= (1.0f / 12.0f)) colorspace_conv: 164 x = sqrtf(3.0f * x); colorspace_conv: 165 else colorspace_conv: 166 x = ARIB_B67_A * logf(12.0f * x - ARIB_B67_B) + ARIB_B67_C; colorspace_conv: 167 colorspace_conv: 168 return x; colorspace_conv: 169 } colorspace_conv: 170 colorspace_conv: 171 COLORSPACE_FUNC float arib_b67_inverse_oetf(float x) { colorspace_conv: 172 // Prevent negative pixels expanding into positive values. colorspace_conv: 173 x = fmaxf(x, 0.0f); colorspace_conv: 174 colorspace_conv: 175 if (x <= 0.5f) colorspace_conv: 176 x = (x * x) * (1.0f / 3.0f); colorspace_conv: 177 else colorspace_conv: 178 x = (expf((x - ARIB_B67_C) / ARIB_B67_A) + ARIB_B67_B) * (1.0f / 12.0f); colorspace_conv: 179 colorspace_conv: 180 return x; colorspace_conv: 181 } colorspace_conv: 182 colorspace_conv: 183 COLORSPACE_FUNC float srgb_eotf(float x) { colorspace_conv: 184 if (x < 12.92f * SRGB_BETA) colorspace_conv: 185 x *= (1.0f / 12.92f); colorspace_conv: 186 else colorspace_conv: 187 x = powf((x + (SRGB_ALPHA - 1.0f)) * (1.0f / SRGB_ALPHA), 2.4f); colorspace_conv: 188 colorspace_conv: 189 return x; colorspace_conv: 190 } colorspace_conv: 191 colorspace_conv: 192 COLORSPACE_FUNC float srgb_inverse_eotf(float x) { colorspace_conv: 193 if (x < SRGB_BETA) colorspace_conv: 194 x = x * 12.92f; colorspace_conv: 195 else colorspace_conv: 196 x = SRGB_ALPHA * powf(x, 1.0f / 2.4f) - (SRGB_ALPHA - 1.0f); colorspace_conv: 197 colorspace_conv: 198 return x; colorspace_conv: 199 } colorspace_conv: 200 colorspace_conv: 201 // Handle values in the range [0.0-1.0] such that they match a legacy CRT. colorspace_conv: 202 COLORSPACE_FUNC float xvycc_eotf(float x) { colorspace_conv: 203 if (x < 0.0f || x > 1.0f) colorspace_conv: 204 return copysignf(rec_709_inverse_oetf(fabsf(x)), x); colorspace_conv: 205 else colorspace_conv: 206 return copysignf(rec_1886_eotf(fabsf(x)), x); colorspace_conv: 207 } colorspace_conv: 208 colorspace_conv: 209 COLORSPACE_FUNC float xvycc_inverse_eotf(float x) { colorspace_conv: 210 if (x < 0.0f || x > 1.0f) colorspace_conv: 211 return copysignf(rec_709_oetf(fabsf(x)), x); colorspace_conv: 212 else colorspace_conv: 213 return copysignf(rec_1886_inverse_eotf(fabsf(x)), x); colorspace_conv: 214 } colorspace_conv: 215 colorspace_conv: 216 //pq_space_to_linear colorspace_conv: 217 COLORSPACE_FUNC float st_2084_eotf(float x) { colorspace_conv: 218 // Filter negative values to avoid NAN. colorspace_conv: 219 if (x > 0.0f) { colorspace_conv: 220 float xpow = powf(x, 1.0f / ST2084_M2); colorspace_conv: 221 float num = fmaxf(xpow - ST2084_C1, 0.0f); colorspace_conv: 222 float den = fmaxf(ST2084_C2 - ST2084_C3 * xpow, FLOAT_EPS); colorspace_conv: 223 x = powf(num / den, 1.0f / ST2084_M1); colorspace_conv: 224 } else { colorspace_conv: 225 x = 0.0f; colorspace_conv: 226 } colorspace_conv: 227 colorspace_conv: 228 return x; colorspace_conv: 229 } colorspace_conv: 230 colorspace_conv: 231 //linear_to_pq_space colorspace_conv: 232 COLORSPACE_FUNC float st_2084_inverse_eotf(float x) { colorspace_conv: 233 // Filter negative values to avoid NAN, and also special-case 0 so that (f(g(0)) == 0). colorspace_conv: 234 if (x > 0.0f) { colorspace_conv: 235 float xpow = powf(x, ST2084_M1); colorspace_conv: 236 #if 0 colorspace_conv: 237 // Original formulation from SMPTE ST 2084:2014 publication. colorspace_conv: 238 float num = ST2084_C1 + ST2084_C2 * xpow; colorspace_conv: 239 float den = 1.0f + ST2084_C3 * xpow; colorspace_conv: 240 x = powf(num / den, ST2084_M2); colorspace_conv: 241 #else colorspace_conv: 242 // More stable arrangement that avoids some cancellation error. colorspace_conv: 243 float num = (ST2084_C1 - 1.0f) + (ST2084_C2 - ST2084_C3) * xpow; colorspace_conv: 244 float den = 1.0f + ST2084_C3 * xpow; colorspace_conv: 245 x = powf(1.0f + num / den, ST2084_M2); colorspace_conv: 246 #endif colorspace_conv: 247 } else { colorspace_conv: 248 x = 0.0f; colorspace_conv: 249 } colorspace_conv: 250 colorspace_conv: 251 return x; colorspace_conv: 252 } colorspace_conv: 253 colorspace_conv: 254 // Applies a per-channel correction instead of the iterative method specified in Rec.2100. colorspace_conv: 255 COLORSPACE_FUNC float arib_b67_eotf(float x) { colorspace_conv: 256 return ootf_1_2(arib_b67_inverse_oetf(x)); colorspace_conv: 257 } colorspace_conv: 258 colorspace_conv: 259 COLORSPACE_FUNC float arib_b67_inverse_eotf(float x) { colorspace_conv: 260 return arib_b67_oetf(inverse_ootf_1_2(x)); colorspace_conv: 261 } colorspace_conv: 262 colorspace_conv: 263 COLORSPACE_FUNC float st_2084_oetf(float x) { colorspace_conv: 264 return st_2084_inverse_eotf(ootf_st2084(x)); colorspace_conv: 265 } colorspace_conv: 266 colorspace_conv: 267 COLORSPACE_FUNC float st_2084_inverse_oetf(float x) { colorspace_conv: 268 return inverse_ootf_st2084(st_2084_eotf(x)); colorspace_conv: 269 } colorspace_conv: 270 colorspace_conv: 271 COLORSPACE_FUNC float3 aribB67Ops(float3 v, float kr, float kg, float kb, float scale) { colorspace_conv: 272 const float gamma = 1.2f; colorspace_conv: 273 float r = v.x * scale; colorspace_conv: 274 float g = v.y * scale; colorspace_conv: 275 float b = v.z * scale; colorspace_conv: 276 colorspace_conv: 277 float yd = fmaxf(kr * r + kg * g + kb * b, FLOAT_EPS); colorspace_conv: 278 float ys_inv = powf(yd, (1.0f - gamma) / gamma); colorspace_conv: 279 colorspace_conv: 280 v.x = arib_b67_oetf(r * ys_inv); colorspace_conv: 281 v.y = arib_b67_oetf(g * ys_inv); colorspace_conv: 282 v.z = arib_b67_oetf(b * ys_inv); colorspace_conv: 283 return v; colorspace_conv: 284 } colorspace_conv: 285 colorspace_conv: 286 COLORSPACE_FUNC float3 aribB67InvOps(float3 v, float kr, float kg, float kb, float scale) { colorspace_conv: 287 const float gamma = 1.2f; colorspace_conv: 288 float r = v.x; colorspace_conv: 289 float g = v.y; colorspace_conv: 290 float b = v.z; colorspace_conv: 291 colorspace_conv: 292 float ys = fmaxf(kr * r + kg * g + kb * b, FLOAT_EPS); colorspace_conv: 293 ys = powf(ys, gamma - 1.0f); colorspace_conv: 294 colorspace_conv: 295 v.x = arib_b67_inverse_oetf(r * ys) * scale; colorspace_conv: 296 v.y = arib_b67_inverse_oetf(g * ys) * scale; colorspace_conv: 297 v.z = arib_b67_inverse_oetf(b * ys) * scale; colorspace_conv: 298 return v; colorspace_conv: 299 } colorspace_conv: 300 colorspace_conv: 301 COLORSPACE_FUNC float3 matrix_mul(float m[3][3], float3 v) { colorspace_conv: 302 float3 ret; colorspace_conv: 303 ret.x = m[0][0] * v.x + m[0][1] * v.y + m[0][2] * v.z; colorspace_conv: 304 ret.y = m[1][0] * v.x + m[1][1] * v.y + m[1][2] * v.z; colorspace_conv: 305 ret.z = m[2][0] * v.x + m[2][1] * v.y + m[2][2] * v.z; colorspace_conv: 306 return ret; colorspace_conv: 307 } colorspace_conv: 308 colorspace_conv: 309 //??: https://gist.github.com/4re/34ccbb95732c1bef47c3d2975ac62395 colorspace_conv: 310 COLORSPACE_FUNC float hable(float x, float A, float B, float C, float D, float E, float F) { colorspace_conv: 311 return ((x*(A*x+C*B)+D*E) / (x*(A*x+B)+D*F)) - E/F; colorspace_conv: 312 } colorspace_conv: 313 colorspace_conv: 314 COLORSPACE_FUNC float hdr2sdr_hable(float x, float source_peak, float ldr_nits, float A, float B, float C, float D, float E, float F) { colorspace_conv: 315 const float eb = source_peak / ldr_nits; colorspace_conv: 316 const float t0 = hable(x, A, B, C, D, E, F); colorspace_conv: 317 const float t1 = hable(eb, A, B, C, D, E, F); colorspace_conv: 318 return t0 / t1; colorspace_conv: 319 } colorspace_conv: 320 colorspace_conv: 321 COLORSPACE_FUNC float hdr2sdr_mobius(float x, float source_peak, float ldr_nits, float t, float peak) { colorspace_conv: 322 const float eb = source_peak / ldr_nits; colorspace_conv: 323 peak *= eb; colorspace_conv: 324 if (x <= t) { colorspace_conv: 325 return x; colorspace_conv: 326 } colorspace_conv: 327 colorspace_conv: 328 float a = -t * t * (peak - 1.0f) / (t * t - 2.0f * t + peak); colorspace_conv: 329 float b = (t * t - 2.0f * t * peak + peak) / fmaxf(peak - 1.0f, 1e-6f); colorspace_conv: 330 return (b * b + 2.0f * b * t + t * t) / (b - a) * (x + a) / (x + b); colorspace_conv: 331 } colorspace_conv: 332 colorspace_conv: 333 COLORSPACE_FUNC float hdr2sdr_reinhard(float x, float source_peak, float ldr_nits, float offset, float peak) { colorspace_conv: 334 const float eb = source_peak / ldr_nits; colorspace_conv: 335 peak *= eb; colorspace_conv: 336 return x / (x + offset) * (peak + offset) / peak; colorspace_conv: 337 } colorspace_conv: 338 colorspace_conv: 339 COLORSPACE_FUNC float linear_to_pq_space(float x) { colorspace_conv: 340 if (x > 0.0f) { colorspace_conv: 341 x *= MP_REF_WHITE / 10000.0f; colorspace_conv: 342 x = powf(x, PQ_M1); colorspace_conv: 343 x = (PQ_C1 + PQ_C2 * x) / (1.0f + PQ_C3 * x); colorspace_conv: 344 x = powf(x, PQ_M2); colorspace_conv: 345 return x; colorspace_conv: 346 } else { colorspace_conv: 347 return 0.0f; colorspace_conv: 348 } colorspace_conv: 349 } colorspace_conv: 350 colorspace_conv: 351 COLORSPACE_FUNC float pq_space_to_linear(float x) { colorspace_conv: 352 if (x > 0.0f) { colorspace_conv: 353 x = powf(x, 1.0f / PQ_M2); colorspace_conv: 354 x = fmaxf(x - PQ_C1, 0.0f) / (PQ_C2 - PQ_C3 * x); colorspace_conv: 355 x = powf(x, 1.0f / PQ_M1); colorspace_conv: 356 x *= 10000.0f / MP_REF_WHITE; colorspace_conv: 357 return x; colorspace_conv: 358 } else { colorspace_conv: 359 return 0.0f; colorspace_conv: 360 } colorspace_conv: 361 } colorspace_conv: 362 colorspace_conv: 363 COLORSPACE_FUNC float apply_bt2390(float x, const float maxLum) { colorspace_conv: 364 const float ks = 1.5f * maxLum - 0.5f; colorspace_conv: 365 float tb = (x - ks) / (1.0f - ks); colorspace_conv: 366 float tb2 = tb * tb; colorspace_conv: 367 float tb3 = tb2 * tb; colorspace_conv: 368 float pb = (2.0f * tb3 - 3.0f * tb2 + 1.0f) * ks + colorspace_conv: 369 (tb3 - 2.0f * tb2 + tb) * (1.0f - ks) + colorspace_conv: 370 (-2.0f * tb3 + 3.0f * tb2) * maxLum; colorspace_conv: 371 //x = mix(pb, x, lessThan(x, ks)); colorspace_conv: 372 x = (x < ks) ? x : pb; colorspace_conv: 373 return x; colorspace_conv: 374 } colorspace_conv: 375 colorspace_conv: 376 COLORSPACE_FUNC float mix(float x, float y, float a) { colorspace_conv: 377 a = (a < 0.0f) ? 0.0f : a; colorspace_conv: 378 a = (a > 1.0f) ? 1.0f : a; colorspace_conv: 379 return (x) * (1.0f - (a)) + (y) * (a); colorspace_conv: 380 } colorspace_conv: 381 colorspace_conv: 382 COLORSPACE_FUNC float lut3d_linear_interp(float v0, float v1, float a) { colorspace_conv: 383 return v0 + (v1 - v0) * a; colorspace_conv: 384 } colorspace_conv: 385 colorspace_conv: 386 COLORSPACE_FUNC float3 lut3d_linear_interp(float3 v0, float3 v1, float a) { colorspace_conv: 387 float3 r; colorspace_conv: 388 r.x = lut3d_linear_interp(v0.x, v1.x, a); colorspace_conv: 389 r.y = lut3d_linear_interp(v0.y, v1.y, a); colorspace_conv: 390 r.z = lut3d_linear_interp(v0.z, v1.z, a); colorspace_conv: 391 return r; colorspace_conv: 392 } colorspace_conv: 393 colorspace_conv: 394 COLORSPACE_FUNC int lut3d_prev_idx(float x) { colorspace_conv: 395 return (int)x; colorspace_conv: 396 } colorspace_conv: 397 colorspace_conv: 398 COLORSPACE_FUNC int lut3d_near_idx(float x) { colorspace_conv: 399 return (int)(x + 0.5f); colorspace_conv: 400 } colorspace_conv: 401 colorspace_conv: 402 COLORSPACE_FUNC int lut3d_next_idx(float x, int size) { colorspace_conv: 403 int next = lut3d_prev_idx(x) + 1; colorspace_conv: 404 return (next >= size) ? size - 1 : next; colorspace_conv: 405 } colorspace_conv: 406 colorspace_conv: 407 COLORSPACE_FUNC float lut3d_prelut(const float s, const int idx, const int size, colorspace_conv: 408 const float prelutmin[3], const float prelutscale[3], const float *__restrict__ prelut) { colorspace_conv: 409 const float x = clamp((s - prelutmin[idx]) * prelutscale[idx], 0.0f, (float)(size - 1)); colorspace_conv: 410 const float c0 = prelut[idx * size + lut3d_prev_idx(x)]; colorspace_conv: 411 const float c1 = prelut[idx * size + lut3d_next_idx(x, size)]; colorspace_conv: 412 return lut3d_linear_interp(c0, c1, x - lut3d_prev_idx(x)); colorspace_conv: 413 } colorspace_conv: 414 colorspace_conv: 415 COLORSPACE_FUNC float3 lut3d_prelut(const float3 in, const int size, colorspace_conv: 416 const float prelutmin[3], const float prelutscale[3], const float *__restrict__ prelut) { colorspace_conv: 417 float3 out; colorspace_conv: 418 out.x = lut3d_prelut(in.x, 0, size, prelutmin, prelutscale, prelut); colorspace_conv: 419 out.y = lut3d_prelut(in.y, 1, size, prelutmin, prelutscale, prelut); colorspace_conv: 420 out.z = lut3d_prelut(in.z, 2, size, prelutmin, prelutscale, prelut); colorspace_conv: 421 return out; colorspace_conv: 422 } colorspace_conv: 423 colorspace_conv: 424 COLORSPACE_FUNC float3 lut3d_get_table(const LUTVEC *__restrict__ lut, const int x, const int y, const int z, const int lutSize0, const int lutSize01) { colorspace_conv: 425 LUTVEC val = lut[x * lutSize01 + y * lutSize0 + z]; colorspace_conv: 426 float3 out; colorspace_conv: 427 out.x = val.x; colorspace_conv: 428 out.y = val.y; colorspace_conv: 429 out.z = val.z; colorspace_conv: 430 return out; colorspace_conv: 431 } colorspace_conv: 432 colorspace_conv: 433 COLORSPACE_FUNC float3 lut3d_interp_nearest(float3 in, const LUTVEC *__restrict__ lut, const int lutSize0, const int lutSize01) { colorspace_conv: 434 return lut3d_get_table(lut, lut3d_near_idx(in.x), lut3d_near_idx(in.y), lut3d_near_idx(in.z), lutSize0, lutSize01); colorspace_conv: 435 } colorspace_conv: 436 colorspace_conv: 437 //??: https://en.wikipedia.org/wiki/Trilinear_interpolation colorspace_conv: 438 COLORSPACE_FUNC float3 lut3d_interp_trilinear(float3 in, const LUTVEC *__restrict__ lut, const int lutSize0, const int lutSize01) { colorspace_conv: 439 const int x0 = lut3d_prev_idx(in.x); colorspace_conv: 440 const int x1 = lut3d_next_idx(in.x, lutSize0); colorspace_conv: 441 const int y0 = lut3d_prev_idx(in.y); colorspace_conv: 442 const int y1 = lut3d_next_idx(in.y, lutSize0); colorspace_conv: 443 const int z0 = lut3d_prev_idx(in.z); colorspace_conv: 444 const int z1 = lut3d_next_idx(in.z, lutSize0); colorspace_conv: 445 const float scalex = in.x - x0; colorspace_conv: 446 const float scaley = in.y - y0; colorspace_conv: 447 const float scalez = in.z - z0; colorspace_conv: 448 const float3 c000 = lut3d_get_table(lut, x0, y0, z0, lutSize0, lutSize01); colorspace_conv: 449 const float3 c001 = lut3d_get_table(lut, x0, y0, z1, lutSize0, lutSize01); colorspace_conv: 450 const float3 c010 = lut3d_get_table(lut, x0, y1, z0, lutSize0, lutSize01); colorspace_conv: 451 const float3 c011 = lut3d_get_table(lut, x0, y1, z1, lutSize0, lutSize01); colorspace_conv: 452 const float3 c100 = lut3d_get_table(lut, x1, y0, z0, lutSize0, lutSize01); colorspace_conv: 453 const float3 c101 = lut3d_get_table(lut, x1, y0, z1, lutSize0, lutSize01); colorspace_conv: 454 const float3 c110 = lut3d_get_table(lut, x1, y1, z0, lutSize0, lutSize01); colorspace_conv: 455 const float3 c111 = lut3d_get_table(lut, x1, y1, z1, lutSize0, lutSize01); colorspace_conv: 456 const float3 c00 = lut3d_linear_interp(c000, c100, scalex); colorspace_conv: 457 const float3 c10 = lut3d_linear_interp(c010, c110, scalex); colorspace_conv: 458 const float3 c01 = lut3d_linear_interp(c001, c101, scalex); colorspace_conv: 459 const float3 c11 = lut3d_linear_interp(c011, c111, scalex); colorspace_conv: 460 const float3 c0 = lut3d_linear_interp(c00, c10, scaley); colorspace_conv: 461 const float3 c1 = lut3d_linear_interp(c01, c11, scaley); colorspace_conv: 462 const float3 c = lut3d_linear_interp(c0, c1, scalez); colorspace_conv: 463 return c; colorspace_conv: 464 } colorspace_conv: 465 colorspace_conv: 466 //??: http://www.filmlight.ltd.uk/pdf/whitepapers/FL-TL-TN-0057-SoftwareLib.pdf colorspace_conv: 467 COLORSPACE_FUNC float3 lut3d_interp_tetrahedral(float3 in, const LUTVEC *__restrict__ lut, const int lutSize0, const int lutSize01) { colorspace_conv: 468 const int x0 = lut3d_prev_idx(in.x); colorspace_conv: 469 const int x1 = lut3d_next_idx(in.x, lutSize0); colorspace_conv: 470 const int y0 = lut3d_prev_idx(in.y); colorspace_conv: 471 const int y1 = lut3d_next_idx(in.y, lutSize0); colorspace_conv: 472 const int z0 = lut3d_prev_idx(in.z); colorspace_conv: 473 const int z1 = lut3d_next_idx(in.z, lutSize0); colorspace_conv: 474 const float scalex = in.x - x0; colorspace_conv: 475 const float scaley = in.y - y0; colorspace_conv: 476 const float scalez = in.z - z0; colorspace_conv: 477 float scale0, scale1, scale2; colorspace_conv: 478 int xA, yA, zA, xB, yB, zB; colorspace_conv: 479 if (scalex > scaley) { colorspace_conv: 480 if (scaley > scalez) { colorspace_conv: 481 scale0 = scalex; colorspace_conv: 482 scale1 = scaley; colorspace_conv: 483 scale2 = scalez; colorspace_conv: 484 xA = x1; yA = y0; zA = z0; colorspace_conv: 485 xB = x1; yB = y1; zB = z0; colorspace_conv: 486 } else if (scalex > scalez) { colorspace_conv: 487 scale0 = scalex; colorspace_conv: 488 scale1 = scalez; colorspace_conv: 489 scale2 = scaley; colorspace_conv: 490 xA = x1; yA = y0; zA = z0; colorspace_conv: 491 xB = x1; yB = y0; zB = z1; colorspace_conv: 492 } else { colorspace_conv: 493 scale0 = scalez; colorspace_conv: 494 scale1 = scalex; colorspace_conv: 495 scale2 = scaley; colorspace_conv: 496 xA = x0; yA = y0; zA = z1; colorspace_conv: 497 xB = x1; yB = y0; zB = z1; colorspace_conv: 498 } colorspace_conv: 499 } else { colorspace_conv: 500 if (scalez > scaley) { colorspace_conv: 501 scale0 = scalez; colorspace_conv: 502 scale1 = scaley; colorspace_conv: 503 scale2 = scalex; colorspace_conv: 504 xA = x0; yA = y0; zA = z1; colorspace_conv: 505 xB = x0; yB = y1; zB = z1; colorspace_conv: 506 } else if (scalez > scalex) { colorspace_conv: 507 scale0 = scaley; colorspace_conv: 508 scale1 = scalez; colorspace_conv: 509 scale2 = scalex; colorspace_conv: 510 xA = x0; yA = y1; zA = z0; colorspace_conv: 511 xB = x0; yB = y1; zB = z1; colorspace_conv: 512 } else { colorspace_conv: 513 scale0 = scaley; colorspace_conv: 514 scale1 = scalex; colorspace_conv: 515 scale2 = scalez; colorspace_conv: 516 xA = x0; yA = y1; zA = z0; colorspace_conv: 517 xB = x1; yB = y1; zB = z0; colorspace_conv: 518 } colorspace_conv: 519 } colorspace_conv: 520 const float3 c000 = lut3d_get_table(lut, x0, y0, z0, lutSize0, lutSize01); colorspace_conv: 521 const float3 c111 = lut3d_get_table(lut, x1, y1, z1, lutSize0, lutSize01); colorspace_conv: 522 const float3 cA = lut3d_get_table(lut, xA, yA, zA, lutSize0, lutSize01); colorspace_conv: 523 const float3 cB = lut3d_get_table(lut, xB, yB, zB, lutSize0, lutSize01); colorspace_conv: 524 const float s0 = 1.0f - scale0; colorspace_conv: 525 const float s1 = scale0 - scale1; colorspace_conv: 526 const float s2 = scale1 - scale2; colorspace_conv: 527 const float s3 = scale2; colorspace_conv: 528 float3 c; colorspace_conv: 529 c.x = s0 * c000.x + s1 * cA.x + s2 * cB.x + s3 * c111.x; colorspace_conv: 530 c.y = s0 * c000.y + s1 * cA.y + s2 * cB.y + s3 * c111.y; colorspace_conv: 531 c.z = s0 * c000.z + s1 * cA.z + s2 * cB.z + s3 * c111.z; colorspace_conv: 532 return c; colorspace_conv: 533 } colorspace_conv: 534 colorspace_conv: 535 COLORSPACE_FUNC float3 lut3d_interp_pyramid(float3 in, const LUTVEC *lut, const int lutSize0, const int lutSize01) { colorspace_conv: 536 const int x0 = lut3d_prev_idx(in.x); colorspace_conv: 537 const int x1 = lut3d_next_idx(in.x, lutSize0); colorspace_conv: 538 const int y0 = lut3d_prev_idx(in.y); colorspace_conv: 539 const int y1 = lut3d_next_idx(in.y, lutSize0); colorspace_conv: 540 const int z0 = lut3d_prev_idx(in.z); colorspace_conv: 541 const int z1 = lut3d_next_idx(in.z, lutSize0); colorspace_conv: 542 const float scalex = in.x - x0; colorspace_conv: 543 const float scaley = in.y - y0; colorspace_conv: 544 const float scalez = in.z - z0; colorspace_conv: 545 colorspace_conv: 546 float scale0, scale1, scale2; colorspace_conv: 547 int xA, yA, zA, xB, yB, zB, xC, yC, zC; colorspace_conv: 548 colorspace_conv: 549 if (scaley > scalex && scalez > scalex) { colorspace_conv: 550 xA = x0; yA = y0; zA = z1; colorspace_conv: 551 xB = x0; yB = y1; zB = z0; colorspace_conv: 552 xC = x0; yC = y1; zC = z1; colorspace_conv: 553 scale0 = scaley; colorspace_conv: 554 scale1 = scalez; colorspace_conv: 555 scale2 = scalex; colorspace_conv: 556 } else if (scalex > scaley && scalez > scaley) { colorspace_conv: 557 xA = x0; yA = y0; zA = z1; colorspace_conv: 558 xB = x1; yB = y0; zB = z0; colorspace_conv: 559 xC = x1; yC = y0; zC = z1; colorspace_conv: 560 scale0 = scalex; colorspace_conv: 561 scale1 = scalez; colorspace_conv: 562 scale2 = scaley; colorspace_conv: 563 } else { colorspace_conv: 564 xA = x0; yA = y1; zA = z0; colorspace_conv: 565 xB = x1; yB = y0; zB = z0; colorspace_conv: 566 xC = x1; yC = y1; zC = z0; colorspace_conv: 567 scale0 = scalex; colorspace_conv: 568 scale1 = scaley; colorspace_conv: 569 scale2 = scalez; colorspace_conv: 570 } colorspace_conv: 571 const float3 c000 = lut3d_get_table(lut, x0, y0, z0, lutSize0, lutSize01); colorspace_conv: 572 const float3 c111 = lut3d_get_table(lut, x1, y1, z1, lutSize0, lutSize01); colorspace_conv: 573 const float3 cA = lut3d_get_table(lut, xA, yA, zA, lutSize0, lutSize01); colorspace_conv: 574 const float3 cB = lut3d_get_table(lut, xB, yB, zB, lutSize0, lutSize01); colorspace_conv: 575 const float3 cC = lut3d_get_table(lut, xC, yC, zC, lutSize0, lutSize01); colorspace_conv: 576 float3 c; colorspace_conv: 577 c.x = c000.x + (cB.x - c000.x) * scale0 + (c111.x - cC.x) * scale2 + (cA.x - c000.x) * scale1 + (cC.x - cA.x - cB.x + c000.x) * scale0 * scale1; colorspace_conv: 578 c.y = c000.y + (cB.y - c000.y) * scale0 + (c111.y - cC.y) * scale2 + (cA.y - c000.y) * scale1 + (cC.y - cA.y - cB.y + c000.y) * scale0 * scale1; colorspace_conv: 579 c.z = c000.z + (cB.z - c000.z) * scale0 + (c111.z - cC.z) * scale2 + (cA.z - c000.z) * scale1 + (cC.z - cA.z - cB.z + c000.z) * scale0 * scale1; colorspace_conv: 580 return c; colorspace_conv: 581 } colorspace_conv: 582 colorspace_conv: 583 COLORSPACE_FUNC float3 lut3d_interp_prism(float3 in, const LUTVEC *lut, const int lutSize0, const int lutSize01) { colorspace_conv: 584 const int x0 = lut3d_prev_idx(in.x); colorspace_conv: 585 const int x1 = lut3d_next_idx(in.x, lutSize0); colorspace_conv: 586 const int y0 = lut3d_prev_idx(in.y); colorspace_conv: 587 const int y1 = lut3d_next_idx(in.y, lutSize0); colorspace_conv: 588 const int z0 = lut3d_prev_idx(in.z); colorspace_conv: 589 const int z1 = lut3d_next_idx(in.z, lutSize0); colorspace_conv: 590 const float scalex = in.x - x0; colorspace_conv: 591 const float scaley = in.y - y0; colorspace_conv: 592 const float scalez = in.z - z0; colorspace_conv: 593 float scale0, scale2; colorspace_conv: 594 int xA, yA, zA, xB, yB, zB; colorspace_conv: 595 colorspace_conv: 596 if (scalez > scalex) { colorspace_conv: 597 scale0 = scalez; colorspace_conv: 598 scale2 = scalex; colorspace_conv: 599 xA = x0; yA = y1; zA = z1; colorspace_conv: 600 xB = x0; yB = y0; zB = z1; colorspace_conv: 601 } else { colorspace_conv: 602 scale0 = scalex; colorspace_conv: 603 scale2 = scalez; colorspace_conv: 604 xA = x1; yA = y1; zA = z0; colorspace_conv: 605 xB = x1; yB = y0; zB = z0; colorspace_conv: 606 } colorspace_conv: 607 const float3 c000 = lut3d_get_table(lut, x0, y0, z0, lutSize0, lutSize01); colorspace_conv: 608 const float3 c010 = lut3d_get_table(lut, x0, y1, z0, lutSize0, lutSize01); colorspace_conv: 609 const float3 c101 = lut3d_get_table(lut, x1, y0, z1, lutSize0, lutSize01); colorspace_conv: 610 const float3 c111 = lut3d_get_table(lut, x1, y1, z1, lutSize0, lutSize01); colorspace_conv: 611 const float3 cA = lut3d_get_table(lut, xA, yA, zA, lutSize0, lutSize01); colorspace_conv: 612 const float3 cB = lut3d_get_table(lut, xB, yB, zB, lutSize0, lutSize01); colorspace_conv: 613 float3 c; colorspace_conv: 614 c.x = c000.x + (cB.x - c000.x) * scale0 + (c101.x - cB.x) * scale2 + (c010.x - c000.x) * scaley + (c000.x - c010.x - cB.x + cA.x) * scale0 * scaley + (cB.x - cA.x - c101.x + c111.x) * scale2 * scaley; colorspace_conv: 615 c.y = c000.y + (cB.y - c000.y) * scale0 + (c101.y - cB.y) * scale2 + (c010.y - c000.y) * scaley + (c000.y - c010.y - cB.y + cA.y) * scale0 * scaley + (cB.y - cA.y - c101.y + c111.y) * scale2 * scaley; colorspace_conv: 616 c.z = c000.z + (cB.z - c000.z) * scale0 + (c101.z - cB.z) * scale2 + (c010.z - c000.z) * scaley + (c000.z - c010.z - cB.z + cA.z) * scale0 * scaley + (cB.z - cA.z - c101.z + c111.z) * scale2 * scaley; colorspace_conv: 617 return c; colorspace_conv: 618 } colorspace_conv: 619 colorspace_conv: 620 struct RGYColorspaceDevParams { colorspace_conv: 621 int lut_offset; colorspace_conv: 622 int prelut_offset; colorspace_conv: 623 // ???offset????????? colorspace_conv: 624 // ??????????????????? colorspace_conv: 625 }; colorspace_conv: 626 colorspace_conv: 627 float *getDevParamsPrelut(void *__restrict__ ptr) { colorspace_conv: 628 return (float *)((char *)ptr + ((RGYColorspaceDevParams *)ptr)->prelut_offset); colorspace_conv: 629 } colorspace_conv: 630 colorspace_conv: 631 const float *getDevParamsPrelut(const void *__restrict__ ptr) { colorspace_conv: 632 return (const float *)((const char *)ptr + ((RGYColorspaceDevParams *)ptr)->prelut_offset); colorspace_conv: 633 } colorspace_conv: 634 colorspace_conv: 635 LUTVEC *getDevParamsLut(void *__restrict__ ptr) { colorspace_conv: 636 return (LUTVEC *)((char *)ptr + ((RGYColorspaceDevParams *)ptr)->lut_offset); colorspace_conv: 637 } colorspace_conv: 638 colorspace_conv: 639 const LUTVEC *getDevParamsLut(const void *__restrict__ ptr) { colorspace_conv: 640 return (const LUTVEC *)((const char *)ptr + ((RGYColorspaceDevParams *)ptr)->lut_offset); colorspace_conv: 641 } colorspace_conv: 642 colorspace_conv: 643 colorspace_conv: 644 #include <stdint.h> colorspace_conv: 645 colorspace_conv: 646 __device__ __inline__ colorspace_conv: 647 float3 convert_colorspace_custom(float3 x, const RGYColorspaceDevParams *__restrict__ params) { colorspace_conv: 648 colorspace_conv: 649 { colorspace_conv: 650 float m[3][3] = { colorspace_conv: 651 { -nan(ind)f, -nan(ind)f, -nan(ind)f }, colorspace_conv: 652 { -nan(ind)f, -nan(ind)f, -nan(ind)f }, colorspace_conv: 653 { -nan(ind)f, -nan(ind)f, -nan(ind)f } colorspace_conv: 654 }; colorspace_conv: 655 x = matrix_mul(m, x); colorspace_conv: 656 } colorspace_conv: 657 colorspace_conv: 658 { //linear->gamma colorspace_conv: 659 const float pre_scaler = 1.0000000000000000e-02f; colorspace_conv: 660 const float post_scaler = 1.0000000000000000e+00f; colorspace_conv: 661 x.x = post_scaler * st_2084_inverse_eotf( x.x * pre_scaler ); colorspace_conv: 662 x.y = post_scaler * st_2084_inverse_eotf( x.y * pre_scaler ); colorspace_conv: 663 x.z = post_scaler * st_2084_inverse_eotf( x.z * pre_scaler ); colorspace_conv: 664 } colorspace_conv: 665 colorspace_conv: 666 { colorspace_conv: 667 float m[3][3] = { colorspace_conv: 668 { 2.6269999999999999e-01f, 6.7799999999999994e-01f, 5.9299999999999999e-02f }, colorspace_conv: 669 { -1.3963006271925163e-01f, -3.6036993728074834e-01f, 5.0000000000000000e-01f }, colorspace_conv: 670 { 5.0000000000000000e-01f, -4.5978570459785700e-01f, -4.0214295402142955e-02f } colorspace_conv: 671 }; colorspace_conv: 672 x = matrix_mul(m, x); colorspace_conv: 673 } colorspace_conv: 674 colorspace_conv: 675 { //range float->int colorspace_conv: 676 const float range_y = 5.6064000000000000e+04f; colorspace_conv: 677 const float offset_y = 4.0960000000000000e+03f; colorspace_conv: 678 const float range_uv = 5.7344000000000000e+04f; colorspace_conv: 679 const float offset_uv = 3.2768000000000000e+04f; colorspace_conv: 680 x.x = x.x * range_y + offset_y; colorspace_conv: 681 x.y = x.y * range_uv + offset_uv; colorspace_conv: 682 x.z = x.z * range_uv + offset_uv; colorspace_conv: 683 } colorspace_conv: 684 colorspace_conv: 685 return x; colorspace_conv: 686 } colorspace_conv: 687 colorspace_conv: 688 static const int PIX_PER_THREAD = 4; colorspace_conv: 689 colorspace_conv: 690 template<typename T> __device__ __inline__ T toPix(float x) { return (T)clamp((x) + 0.5f, 0.0f, (1<<(sizeof(T)*8)) - 0.5f); } colorspace_conv: 691 template<> __device__ __inline__ float toPix<float> (float x) { return x; } colorspace_conv: 692 colorspace_conv: 693 template<typename TypeOut, typename TypeIn> colorspace_conv: 694 __global__ void kernel_filter( colorspace_conv: 695 uint8_t *__restrict__ pDstY, uint8_t *__restrict__ pDstU, uint8_t *__restrict__ pDstV, colorspace_conv: 696 const int dstPitch, const int dstWidth, const int dstHeight, colorspace_conv: 697 const uint8_t *__restrict__ pSrcY, const uint8_t *__restrict__ pSrcU, const uint8_t *__restrict__ pSrcV, colorspace_conv: 698 const int srcPitch, const int srcWidth, const int srcHeight, bool srcInterlaced, colorspace_conv: 699 const RGYColorspaceDevParams *__restrict__ params) { colorspace_conv: 700 const int ix = (blockIdx.x * blockDim.x + threadIdx.x) * PIX_PER_THREAD; colorspace_conv: 701 const int iy = blockIdx.y * blockDim.y + threadIdx.y; colorspace_conv: 702 colorspace_conv: 703 struct __align__(sizeof(TypeIn) * 4) TypeIn4 { colorspace_conv: 704 TypeIn x, y, z, w; colorspace_conv: 705 }; colorspace_conv: 706 colorspace_conv: 707 struct __align__(sizeof(TypeOut) * 4) TypeOut4 { colorspace_conv: 708 TypeOut x, y, z, w; colorspace_conv: 709 }; colorspace_conv: 710 colorspace_conv: 711 if (ix < dstWidth && iy < dstHeight) { colorspace_conv: 712 colorspace_conv: 713 TypeIn4 srcY = *(TypeIn4 *)(pSrcY + iy * srcPitch + ix * sizeof(TypeIn)); colorspace_conv: 714 TypeIn4 srcU = *(TypeIn4 *)(pSrcU + iy * srcPitch + ix * sizeof(TypeIn)); colorspace_conv: 715 TypeIn4 srcV = *(TypeIn4 *)(pSrcV + iy * srcPitch + ix * sizeof(TypeIn)); colorspace_conv: 716 colorspace_conv: 717 float3 pix0 = make_float3((float)srcY.x, (float)srcU.x, (float)srcV.x); colorspace_conv: 718 float3 pix1 = make_float3((float)srcY.y, (float)srcU.y, (float)srcV.y); colorspace_conv: 719 float3 pix2 = make_float3((float)srcY.z, (float)srcU.z, (float)srcV.z); colorspace_conv: 720 float3 pix3 = make_float3((float)srcY.w, (float)srcU.w, (float)srcV.w); colorspace_conv: 721 colorspace_conv: 722 pix0 = convert_colorspace_custom(pix0, params); colorspace_conv: 723 pix1 = convert_colorspace_custom(pix1, params); colorspace_conv: 724 pix2 = convert_colorspace_custom(pix2, params); colorspace_conv: 725 pix3 = convert_colorspace_custom(pix3, params); colorspace_conv: 726 colorspace_conv: 727 TypeOut4 dstY, dstU, dstV; colorspace_conv: 728 dstY.x = toPix<TypeOut>(pix0.x); dstU.x = toPix<TypeOut>(pix0.y); dstV.x = toPix<TypeOut>(pix0.z); colorspace_conv: 729 dstY.y = toPix<TypeOut>(pix1.x); dstU.y = toPix<TypeOut>(pix1.y); dstV.y = toPix<TypeOut>(pix1.z); colorspace_conv: 730 dstY.z = toPix<TypeOut>(pix2.x); dstU.z = toPix<TypeOut>(pix2.y); dstV.z = toPix<TypeOut>(pix2.z); colorspace_conv: 731 dstY.w = toPix<TypeOut>(pix3.x); dstU.w = toPix<TypeOut>(pix3.y); dstV.w = toPix<TypeOut>(pix3.z); colorspace_conv: 732 colorspace_conv: 733 TypeOut4 *ptrDstY = (TypeOut4 *)(pDstY + iy * dstPitch + ix * sizeof(TypeOut)); colorspace_conv: 734 TypeOut4 *ptrDstU = (TypeOut4 *)(pDstU + iy * dstPitch + ix * sizeof(TypeOut)); colorspace_conv: 735 TypeOut4 *ptrDstV = (TypeOut4 *)(pDstV + iy * dstPitch + ix * sizeof(TypeOut)); colorspace_conv: 736 colorspace_conv: 737 ptrDstY[0] = dstY; colorspace_conv: 738 ptrDstU[0] = dstU; colorspace_conv: 739 ptrDstV[0] = dstV; colorspace_conv: 740 } colorspace_conv: 741 }; colorspace_conv: 742 colorspace_conv: 743 #endif // _JITIFY_INCLUDE_GUARD_3F6B24FAF8F3CEB7 colorspace_conv: --------------------------------------- colorspace_conv: Compiler options: --use_fast_math -arch=compute_75 colorspace_conv: --------------------------------------------------- colorspace_conv: --- JIT compile log for colorspace_conv --- colorspace_conv: --------------------------------------------------- colorspace_conv: colorspace_conv(651): error: identifier "ind" is undefined colorspace_conv: colorspace_conv(651): error: expected a "," colorspace_conv: colorspace_conv(651): error: expected a "," colorspace_conv: colorspace_conv(651): error: expected a "," colorspace_conv: colorspace_conv(652): error: expected a "," colorspace_conv: colorspace_conv(652): error: expected a "," colorspace_conv: colorspace_conv(652): error: expected a "," colorspace_conv: colorspace_conv(653): error: expected a "," colorspace_conv: colorspace_conv(653): error: expected a "," colorspace_conv: colorspace_conv(653): error: expected a "," colorspace_conv: colorspace_conv(45): warning: variable "MP_REF_WHITE_HLG" was declared but never referenced colorspace_conv: colorspace_conv(688): warning: variable "PIX_PER_THREAD" was declared but never referenced colorspace_conv: 10 errors detected in the compilation of "colorspace_conv". colorspace: failed to setup custom filter: error in cuda..

Any ideas on how to fix this, please?

I saw previous issues with colorspace_conv in other platforms/versions that needed some stuffs to be included in the binary, could this be related to those?

Thanks in advance

feureau avatar Oct 11 '24 03:10 feureau