clspv
clspv copied to clipboard
Core dumped error
Using the last commit "a353c8379cd673da95a944d34e753267bc50892f". I tried to compile this code:
#define VEC_DATA_TYPE_STR(type, size) type##size
#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
#ifndef DATA_TYPE
#define DATA_TYPE short
#endif /* DATA_TYPE */
#ifndef DATA_TYPE_OUT
#define DATA_TYPE_OUT uchar
#endif /* DATA_TYPE_OUT */
#define CONVERT_STR(x, type) (convert_##type((x)))
#define CONVERT(x, type) CONVERT_STR(x, type)
#define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))
#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
#define CONVERT_TO_IMAGE_STRUCT(name) \
update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
typedef struct {
VEC_DATA_TYPE(DATA_TYPE, 4) x;
VEC_DATA_TYPE(DATA_TYPE, 4) y;
} TWO_VALUES;
typedef struct Image
{
__global uchar *ptr; /**< Pointer to the starting postion of the buffer */
int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
int stride_x; /**< Stride of the image in X dimension (in bytes) */
int stride_y; /**< Stride of the image in Y dimension (in bytes) */
} Image;
__global inline uchar *offset(const Image *img, int x, int y)
{
return img->ptr + x * img->stride_x + y * img->stride_y;
}
#define IMAGE_DECLARATION(name) \
__global uchar *name##_ptr, \
uint name##_stride_x, \
uint name##_step_x, \
uint name##_stride_y, \
uint name##_step_y, \
uint name##_offset_first_element_in_bytes
Image inline update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
{
Image img =
{
.ptr = ptr,
.offset_first_element_in_bytes = offset_first_element_in_bytes,
.stride_x = stride_x,
.stride_y = stride_y
};
img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
return img;
}
/** Compute a 1D horizontal convolution of size 3 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels).
*
* @param[in] left_pixel Pointer to the left pixel.
* @param[in] left_coeff Weight of the left pixel
* @param[in] middle_coeff Weight of the middle pixel
* @param[in] right_coeff Weight of the right pixel
*
* @return a short8 containing 8 convoluted values.
*/
inline TWO_VALUES convolution1x3(__global const uchar *left_pixel,
const short left_coeff,
const short middle_coeff,
const short right_coeff)
{
uchar4 temp1 = vload4(0, left_pixel);
uchar4 temp2 = vload4(4, left_pixel);
uchar4 temp3 = vload4(8, left_pixel);
uchar4 temp4 = vload4(12, left_pixel);
VEC_DATA_TYPE(DATA_TYPE, 4) left1 = CONVERT(temp1.s0123, VEC_DATA_TYPE(DATA_TYPE, 4));
VEC_DATA_TYPE(DATA_TYPE, 4) left2 = CONVERT(temp2.s0123, VEC_DATA_TYPE(DATA_TYPE, 4));
VEC_DATA_TYPE(DATA_TYPE, 4) middle1 = {
(DATA_TYPE) temp1.s1,
(DATA_TYPE) temp1.s2,
(DATA_TYPE) temp1.s3,
(DATA_TYPE) temp2.s0
};
VEC_DATA_TYPE(DATA_TYPE, 4) middle2 = {
(DATA_TYPE) temp2.s1,
(DATA_TYPE) temp2.s2,
(DATA_TYPE) temp2.s3,
(DATA_TYPE) temp3.s0
};
VEC_DATA_TYPE(DATA_TYPE, 4) right1 = {
(DATA_TYPE) temp1.s2,
(DATA_TYPE) temp1.s3,
(DATA_TYPE) temp2.s0,
(DATA_TYPE) temp2.s1
};
VEC_DATA_TYPE(DATA_TYPE, 4) right2 = {
(DATA_TYPE) temp2.s2,
(DATA_TYPE) temp2.s3,
(DATA_TYPE) temp2.s0,
(DATA_TYPE) temp3.s1
};
TWO_VALUES values;
values.x = left1 * (VEC_DATA_TYPE(DATA_TYPE, 4))left_coeff + middle1 * (VEC_DATA_TYPE(DATA_TYPE, 4))middle_coeff + right1 * (VEC_DATA_TYPE(DATA_TYPE, 4))right_coeff;
values.y = left2 * (VEC_DATA_TYPE(DATA_TYPE, 4))left_coeff + middle2 * (VEC_DATA_TYPE(DATA_TYPE, 4))middle_coeff + right2 * (VEC_DATA_TYPE(DATA_TYPE, 4))right_coeff;
return values;
}
/** Apply a 3x3 convolution matrix to a single channel U8 input image and return the result.
*
* Convolution matrix layout:
*
* [ mat0, mat1, mat2 ]\n
* [ mat3, mat4, mat5 ]\n
* [ mat6, mat7, mat8 ]\n
*
* @param[in] src A pointer to source Image structure
* @param[in] mat0 Coefficient from the convolution matrix
* @param[in] mat1 Coefficient from the convolution matrix
* @param[in] mat2 Coefficient from the convolution matrix
* @param[in] mat3 Coefficient from the convolution matrix
* @param[in] mat4 Coefficient from the convolution matrix
* @param[in] mat5 Coefficient from the convolution matrix
* @param[in] mat6 Coefficient from the convolution matrix
* @param[in] mat0 Coefficient from the convolution matrix
* @param[in] mat7 Coefficient from the convolution matrix
* @param[in] mat8 Coefficient from the convolution matrix
* @param[in] scale Convolution matrix scale (Sum of the coefficients, or 1 if the sum is 0)
*
* @return a short8 containing 8 convoluted and scaled values.
*/
inline TWO_VALUES convolution3x3(
Image *src,
const short mat0, const short mat1, const short mat2,
const short mat3, const short mat4, const short mat5,
const short mat6, const short mat7, const short mat8, uint scale)
{
// Output pixels
TWO_VALUES pixels;
TWO_VALUES temp;
// Row 0
temp = convolution1x3(offset(src, -1, -1), mat0, mat1, mat2);
pixels.x = temp.x;
pixels.y = temp.y;
// Row
temp = convolution1x3(offset(src, -1, 0), mat3, mat4, mat5);
pixels.x += temp.x;
pixels.y += temp.y;
// Row 2
temp = convolution1x3(offset(src, -1, 1), mat6, mat7, mat8);
pixels.x += temp.x;
pixels.y += temp.y;
// Divide by the scale
pixels.x /= (VEC_DATA_TYPE(DATA_TYPE, 4))scale;
pixels.y /= (VEC_DATA_TYPE(DATA_TYPE, 4))scale;
return pixels;
}
__kernel void convolution3x3_static(
IMAGE_DECLARATION(src),
IMAGE_DECLARATION(dst),
DATA_TYPE MAT0, DATA_TYPE MAT1, DATA_TYPE MAT2, DATA_TYPE MAT3, DATA_TYPE MAT4, DATA_TYPE MAT5,
DATA_TYPE MAT6, DATA_TYPE MAT7, DATA_TYPE MAT8, DATA_TYPE SCALE)
{
Image src = CONVERT_TO_IMAGE_STRUCT(src);
Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
TWO_VALUES
pixels = convolution3x3(&src,
MAT0, MAT1, MAT2, MAT3, MAT4, MAT5, MAT6, MAT7, MAT8, SCALE);
// Store the result as is in dst
vstore4((short4)pixels.x, 0, (__global short *)dst.ptr);
vstore4((short4)pixels.y, 4, (__global short *)dst.ptr);
}
And it gave a core dumped error:
clspv conv2d_3x3.cl
About to crash at 820; ModuleID = 'conv2d_3x3.cl'
source_filename = "conv2d_3x3.cl"
target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir-unknown-unknown"
%struct.Image = type { i8 addrspace(1)*, i32, i32, i32 }
%struct.TWO_VALUES = type { <4 x i16>, <4 x i16> }
@__spirv_GlobalInvocationId = local_unnamed_addr addrspace(4) global <3 x i32> zeroinitializer
@__spirv_WorkgroupSize = local_unnamed_addr addrspace(7) global <3 x i32> zeroinitializer
; Function Attrs: inlinehint nounwind readonly
define spir_func %struct.Image @update_image_workitem_ptr(i8 addrspace(1)*, i32, i32, i32, i32, i32) local_unnamed_addr #0 {
entry:
%6 = getelementptr <3 x i32>, <3 x i32> addrspace(4)* @__spirv_GlobalInvocationId, i32 0, i32 0
%7 = load i32, i32 addrspace(4)* %6, align 16
%mul = mul i32 %7, %3
%add = add i32 %mul, %1
%8 = getelementptr <3 x i32>, <3 x i32> addrspace(4)* @__spirv_GlobalInvocationId, i32 0, i32 1
%9 = load i32, i32 addrspace(4)* %8, align 4
%mul7 = mul i32 %9, %5
%add8 = add i32 %add, %mul7
%add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %0, i32 %add8
%10 = insertvalue %struct.Image undef, i8 addrspace(1)* %add.ptr, 0
%11 = insertvalue %struct.Image %10, i32 %1, 1
%12 = insertvalue %struct.Image %11, i32 %2, 2
%13 = insertvalue %struct.Image %12, i32 %4, 3
ret %struct.Image %13
}
; Function Attrs: inlinehint nounwind
define spir_func %struct.TWO_VALUES @convolution1x3(i8 addrspace(1)*, i16 signext, i16 signext, i16 signext) local_unnamed_addr #1 {
entry:
%call = tail call spir_func <4 x i8> @_Z6vload4jPU3AS1Kh(i32 0, i8 addrspace(1)* %0) #5
%call1 = tail call spir_func <4 x i8> @_Z6vload4jPU3AS1Kh(i32 4, i8 addrspace(1)* %0) #5
%call2 = tail call spir_func <4 x i8> @_Z6vload4jPU3AS1Kh(i32 8, i8 addrspace(1)* %0) #5
%call3 = tail call spir_func <4 x i8> @_Z6vload4jPU3AS1Kh(i32 12, i8 addrspace(1)* %0) #5
%call4 = tail call spir_func <4 x i16> @_Z14convert_short4Dv4_h(<4 x i8> %call) #6
%call5 = tail call spir_func <4 x i16> @_Z14convert_short4Dv4_h(<4 x i8> %call1) #6
%4 = extractelement <4 x i8> %call, i32 1
%conv = zext i8 %4 to i16
%vecinit = insertelement <4 x i16> undef, i16 %conv, i32 0
%5 = extractelement <4 x i8> %call, i32 2
%conv6 = zext i8 %5 to i16
%vecinit7 = insertelement <4 x i16> %vecinit, i16 %conv6, i32 1
%6 = extractelement <4 x i8> %call, i32 3
%conv8 = zext i8 %6 to i16
%vecinit9 = insertelement <4 x i16> %vecinit7, i16 %conv8, i32 2
%7 = extractelement <4 x i8> %call1, i32 0
%conv10 = zext i8 %7 to i16
%vecinit11 = insertelement <4 x i16> %vecinit9, i16 %conv10, i32 3
%8 = extractelement <4 x i8> %call1, i32 1
%conv12 = zext i8 %8 to i16
%vecinit13 = insertelement <4 x i16> undef, i16 %conv12, i32 0
%9 = extractelement <4 x i8> %call1, i32 2
%conv14 = zext i8 %9 to i16
%vecinit15 = insertelement <4 x i16> %vecinit13, i16 %conv14, i32 1
%10 = extractelement <4 x i8> %call1, i32 3
%conv16 = zext i8 %10 to i16
%vecinit17 = insertelement <4 x i16> %vecinit15, i16 %conv16, i32 2
%11 = extractelement <4 x i8> %call2, i32 0
%conv18 = zext i8 %11 to i16
%vecinit19 = insertelement <4 x i16> %vecinit17, i16 %conv18, i32 3
%vecinit21 = insertelement <4 x i16> undef, i16 %conv6, i32 0
%vecinit23 = insertelement <4 x i16> %vecinit21, i16 %conv8, i32 1
%vecinit25 = insertelement <4 x i16> %vecinit23, i16 %conv10, i32 2
%vecinit27 = insertelement <4 x i16> %vecinit25, i16 %conv12, i32 3
%vecinit29 = insertelement <4 x i16> undef, i16 %conv14, i32 0
%vecinit31 = insertelement <4 x i16> %vecinit29, i16 %conv16, i32 1
%vecinit33 = insertelement <4 x i16> %vecinit31, i16 %conv10, i32 2
%12 = extractelement <4 x i8> %call2, i32 1
%conv34 = zext i8 %12 to i16
%vecinit35 = insertelement <4 x i16> %vecinit33, i16 %conv34, i32 3
%splat.splatinsert = insertelement <4 x i16> undef, i16 %1, i32 0
%splat.splat = shufflevector <4 x i16> %splat.splatinsert, <4 x i16> undef, <4 x i32> zeroinitializer
%mul = mul <4 x i16> %call4, %splat.splat
%splat.splatinsert36 = insertelement <4 x i16> undef, i16 %2, i32 0
%splat.splat37 = shufflevector <4 x i16> %splat.splatinsert36, <4 x i16> undef, <4 x i32> zeroinitializer
%mul38 = mul <4 x i16> %vecinit11, %splat.splat37
%add = add <4 x i16> %mul38, %mul
%splat.splatinsert39 = insertelement <4 x i16> undef, i16 %3, i32 0
%splat.splat40 = shufflevector <4 x i16> %splat.splatinsert39, <4 x i16> undef, <4 x i32> zeroinitializer
%mul41 = mul <4 x i16> %vecinit27, %splat.splat40
%add42 = add <4 x i16> %add, %mul41
%mul45 = mul <4 x i16> %call5, %splat.splat
%mul48 = mul <4 x i16> %vecinit19, %splat.splat37
%add49 = add <4 x i16> %mul48, %mul45
%mul52 = mul <4 x i16> %vecinit35, %splat.splat40
%add53 = add <4 x i16> %add49, %mul52
%13 = insertvalue %struct.TWO_VALUES undef, <4 x i16> %add42, 0
%14 = insertvalue %struct.TWO_VALUES %13, <4 x i16> %add53, 1
ret %struct.TWO_VALUES %14
}
declare spir_func <4 x i8> @_Z6vload4jPU3AS1Kh(i32, i8 addrspace(1)*) local_unnamed_addr #2
; Function Attrs: nounwind readnone
declare spir_func <4 x i16> @_Z14convert_short4Dv4_h(<4 x i8>) local_unnamed_addr #3
; Function Attrs: nounwind
define spir_kernel void @convolution3x3_static(i8 addrspace(1)* %src_ptr, i32 %src_stride_x, i32 %src_step_x, i32 %src_stride_y, i32 %src_step_y, i32 %src_offset_first_element_in_bytes, i8 addrspace(1)* %dst_ptr, i32 %dst_stride_x, i32 %dst_step_x, i32 %dst_stride_y, i32 %dst_step_y, i32 %dst_offset_first_element_in_bytes, i16 signext %MAT0, i16 signext %MAT1, i16 signext %MAT2, i16 signext %MAT3, i16 signext %MAT4, i16 signext %MAT5, i16 signext %MAT6, i16 signext %MAT7, i16 signext %MAT8, i16 signext %SCALE) local_unnamed_addr #4 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
entry:
%0 = tail call spir_func %struct.Image @update_image_workitem_ptr(i8 addrspace(1)* %src_ptr, i32 %src_offset_first_element_in_bytes, i32 %src_stride_x, i32 %src_step_x, i32 %src_stride_y, i32 %src_step_y) #7
%.elt = extractvalue %struct.Image %0, 0
%.elt13 = extractvalue %struct.Image %0, 2
%.elt15 = extractvalue %struct.Image %0, 3
%1 = tail call spir_func %struct.Image @update_image_workitem_ptr(i8 addrspace(1)* %dst_ptr, i32 %dst_offset_first_element_in_bytes, i32 %dst_stride_x, i32 %dst_step_x, i32 %dst_stride_y, i32 %dst_step_y) #7
%.elt17 = extractvalue %struct.Image %1, 0
%mul.i.i = sub nsw i32 0, %.elt13
%add.ptr.i.i = getelementptr inbounds i8, i8 addrspace(1)* %.elt, i32 %mul.i.i
%mul1.i.i = sub nsw i32 0, %.elt15
%2 = add i32 %mul.i.i, %mul1.i.i
%3 = getelementptr inbounds i8, i8 addrspace(1)* %.elt, i32 %2
%4 = tail call spir_func %struct.TWO_VALUES @convolution1x3(i8 addrspace(1)* %3, i16 signext %MAT0, i16 signext %MAT1, i16 signext %MAT2) #5
%.elt.i = extractvalue %struct.TWO_VALUES %4, 0
%.elt8.i = extractvalue %struct.TWO_VALUES %4, 1
%5 = tail call spir_func %struct.TWO_VALUES @convolution1x3(i8 addrspace(1)* %add.ptr.i.i, i16 signext %MAT3, i16 signext %MAT4, i16 signext %MAT5) #5
%.elt10.i = extractvalue %struct.TWO_VALUES %5, 0
%.elt12.i = extractvalue %struct.TWO_VALUES %5, 1
%add.i = add <4 x i16> %.elt10.i, %.elt.i
%add9.i = add <4 x i16> %.elt12.i, %.elt8.i
%6 = add i32 %mul.i.i, %.elt15
%7 = getelementptr inbounds i8, i8 addrspace(1)* %.elt, i32 %6
%8 = tail call spir_func %struct.TWO_VALUES @convolution1x3(i8 addrspace(1)* %7, i16 signext %MAT6, i16 signext %MAT7, i16 signext %MAT8) #5
%.elt14.i = extractvalue %struct.TWO_VALUES %8, 0
%.elt16.i = extractvalue %struct.TWO_VALUES %8, 1
%add14.i = add <4 x i16> %add.i, %.elt14.i
%add17.i = add <4 x i16> %add9.i, %.elt16.i
%splat.splatinsert.i = insertelement <4 x i16> undef, i16 %SCALE, i32 0
%splat.splat.i = shufflevector <4 x i16> %splat.splatinsert.i, <4 x i16> undef, <4 x i32> zeroinitializer
%div.i = sdiv <4 x i16> %add14.i, %splat.splat.i
%div23.i = sdiv <4 x i16> %add17.i, %splat.splat.i
%9 = bitcast i8 addrspace(1)* %.elt17 to i16 addrspace(1)*
tail call spir_func void @_Z7vstore4Dv4_sjPU3AS1s(<4 x i16> %div.i, i32 0, i16 addrspace(1)* %9) #5
tail call spir_func void @_Z7vstore4Dv4_sjPU3AS1s(<4 x i16> %div23.i, i32 4, i16 addrspace(1)* %9) #5
ret void
}
declare spir_func void @_Z7vstore4Dv4_sjPU3AS1s(<4 x i16>, i32, i16 addrspace(1)*) local_unnamed_addr #2
attributes #0 = { inlinehint nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="0" "stackrealign" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { inlinehint nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="0" "stackrealign" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="0" "stackrealign" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="0" "stackrealign" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="0" "stackrealign" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #5 = { nobuiltin nounwind }
attributes #6 = { nobuiltin nounwind readnone }
attributes #7 = { nobuiltin }
!llvm.module.flags = !{!0}
!opencl.ocl.version = !{!1}
!opencl.spir.version = !{!1}
!llvm.ident = !{!2}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, i32 2}
!2 = !{!"clang version 6.0.0 (https://github.com/llvm-mirror/clang 82fcdc620f7367f0ffc24b8ade93539e0bfd9e30) (https://github.com/llvm-mirror/llvm 82f73ee5b37a2a4cc1bdad02bebaaaba71b65400)"}
!3 = !{i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0}
!4 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"}
!5 = !{!"uchar*", !"uint", !"uint", !"uint", !"uint", !"uint", !"uchar*", !"uint", !"uint", !"uint", !"uint", !"uint", !"short", !"short", !"short", !"short", !"short", !"short", !"short", !"short", !"short", !"short"}
!6 = !{!"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !"", !""}
Aboot
%9 = bitcast i8 addrspace(1)* %.elt17 to i16 addrspace(1)*Handle above user of scalar bitcast with gep on ReplacePointerBitcastPass
UNREACHABLE executed at ../lib/ReplacePointerBitcastPass.cpp:931!
Aborted (core dumped)
Vulkan does not support 8bit storage right out of the box. So the compiler will run into difficulty (or assert out as in this case) when trying to coerce the code into something Vulkan will support. The constructs you are using would require pretty much full 8bit storage.
Currently this is beyond the scope of Clspv. Vulkan has an extension VK_KHR_8bit_storage which Clspv does not attempt to target.
This is now passing