clspv
clspv copied to clipboard
Function call with generic struct reference fails to compile.
clspv.exe -cl-std=CLC++ -inline-entry-points -cl-kernel-arg-info
Fails: 'immarg attribute only applies to intrinsics' 'ptr @_Z21lmemcpy.p4.p0.i32' 'LLVM ERROR: Broken module found, compilation aborted!'
Source:
struct XY
{
float x, y;
};
void Modify(XY& v)
{
v = {1, 1}; // Fails
// v.x = 1; v.y = 1; // OK
}
// OK:
// void Modify(private XY& v)
// {
// v = {1, 1};
// }
kernel
void BeamMeUp(global float* In, global float* Out, write_only image2d_t ImageOut)
{
XY p;
p = {0, 0}; // OK
Modify(p);
*Out = p.x;
}
IR:
; ModuleID = 'M:/Program/beyond/Metalogic/Exports/Beam/DarkBare.beam'
source_filename = "M:/Program/beyond/Metalogic/Exports/Beam/DarkBare.beam"
target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir-unknown-unknown"
%struct.XY = type { float, float }
; Function Attrs: convergent mustprogress norecurse nounwind
define dso_local spir_func void @_Z6ModifyRU3AS42XY(ptr addrspace(4) align 4 dereferenceable(8) %v) #0 {
entry:
%v.addr = alloca ptr addrspace(4), align 4
%ref.tmp = alloca %struct.XY, align 4
store ptr addrspace(4) %v, ptr %v.addr, align 4
%x = getelementptr inbounds %struct.XY, ptr %ref.tmp, i32 0, i32 0
store float 1.000000e+00, ptr %x, align 4
%y = getelementptr inbounds %struct.XY, ptr %ref.tmp, i32 0, i32 1
store float 1.000000e+00, ptr %y, align 4
%ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4)
%0 = load ptr addrspace(4), ptr %v.addr, align 4
call void @llvm.memcpy.p4.p4.i32(ptr addrspace(4) align 4 %0, ptr addrspace(4) align 4 %ref.tmp.ascast, i32 8, i1 false)
ret void
}
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p4.p4.i32(ptr addrspace(4) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i32, i1 immarg) #1
; Function Attrs: convergent mustprogress norecurse nounwind
define dso_local spir_kernel void @BeamMeUp(ptr addrspace(1) align 4 %In, ptr addrspace(1) align 4 %Out, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %ImageOut) #2 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
entry:
%In.addr = alloca ptr addrspace(1), align 4
%Out.addr = alloca ptr addrspace(1), align 4
%ImageOut.addr = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1), align 4
%p = alloca %struct.XY, align 4
%ref.tmp = alloca %struct.XY, align 4
store ptr addrspace(1) %In, ptr %In.addr, align 4
store ptr addrspace(1) %Out, ptr %Out.addr, align 4
store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %ImageOut, ptr %ImageOut.addr, align 4
%x = getelementptr inbounds %struct.XY, ptr %ref.tmp, i32 0, i32 0
store float 0.000000e+00, ptr %x, align 4
%y = getelementptr inbounds %struct.XY, ptr %ref.tmp, i32 0, i32 1
store float 0.000000e+00, ptr %y, align 4
%ref.tmp.ascast = addrspacecast ptr %ref.tmp to ptr addrspace(4)
%p.ascast = addrspacecast ptr %p to ptr addrspace(4)
call void @llvm.memcpy.p4.p4.i32(ptr addrspace(4) align 4 %p.ascast, ptr addrspace(4) align 4 %ref.tmp.ascast, i32 8, i1 false)
%p.ascast1 = addrspacecast ptr %p to ptr addrspace(4)
call spir_func void @_Z6ModifyRU3AS42XY(ptr addrspace(4) align 4 dereferenceable(8) %p.ascast1) #3
%x2 = getelementptr inbounds %struct.XY, ptr %p, i32 0, i32 0
%0 = load float, ptr %x2, align 4
%1 = load ptr addrspace(1), ptr %Out.addr, align 4
store float %0, ptr addrspace(1) %1, align 4
ret void
}
attributes #0 = { convergent mustprogress norecurse nounwind "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" }
attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #2 = { convergent mustprogress norecurse nounwind "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" "uniform-work-group-size"="true" }
attributes #3 = { convergent nobuiltin nounwind "no-builtins" }
!llvm.module.flags = !{!0, !1}
!opencl.ocl.version = !{!2}
!opencl.spir.version = !{!2}
!llvm.ident = !{!3}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"direct-access-external-data", i32 0}
!2 = !{i32 2, i32 0}
!3 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project c6fd2ff346c3340ae29a05e74911eea09ae779a1)"}
!4 = !{i32 1, i32 1, i32 1}
!5 = !{!"none", !"none", !"write_only"}
!6 = !{!"float*", !"float*", !"image2d_t"}
!7 = !{!"", !"", !""}
The error mentioned here does not feel like an issue with clspv but more with llvm/clang.
But try to reproduce it, I just end up with:
IsSupportedType lacks support for QualType: __generic XY &
Type not covered by IsSupportedType.
UNREACHABLE executed at clspv/lib/FrontendPlugin.cpp:277!
Whatever code I compile (OK or fails)
It is interesting you are getting a different error, and not even the OKs compile there. I short circuited the IsSupportedType() to always return true, so it would not give that error on your side, but it made no difference here, as the checks must be returning true, and the bug is further down the line. Of course, I remove any such hacks, and the language feature unlocks are isolated to -cl-std=Beam. The fact that Clspv is not compiling at all there is something seriously wrong!
Here are some more clues. Above source compiles OK with an older Clspv from May 22, using LLVM 15.
It also compiles OK with Clang release 16, using:
clang -x cl -cl-std=CLC++ -S -emit-llvm
Perhaps someone can test it with the current state of LLVM and Clang 17, if this would help rule out anything.
Also, this inspector pragma inserted at the end of the kernel body, gives the same results for both Clang 16 and current Clspv:
#pragma clang __debug dump p
outs:
VarDecl 0x1ebf632a408 <DarkBare.beam:92:3, col:6> col:6 used p '__private XY':'__private XY' callinit
`-CXXConstructExpr 0x1ebf632a988 <col:6> '__private XY':'__private XY' 'void () __generic noexcept'
The inspector pragmas may give some hints as to when things go wrong, what is exactly happening to type address spaces.
I think this is fixed as of now :)