numba-dpex
numba-dpex copied to clipboard
Numba-dpex adds extra indexing validation instructions inside kernels
Numba-dpex generates the following LLVM IR for a one-dimensional vector addition kernel:
define internal spir_func i32 @dpex_py_devfn__3C__(
i8** nocapture %.ret, i8* %arg.a.0, i8* %arg.a.1, i64 %arg.a.2, i64 %arg.a.3, i64 addrspace(1)* %arg.a.4, i64 %arg.a.5.0, i64 %arg.a.6.0,
i64 %arg.a__size0_1,
i8* %arg.b.0, i8* %arg.b.1, i64 %arg.b.2, i64 %arg.b.3, i64 addrspace(1)* %arg.b.4, i64 %arg.b.5.0, i64 %arg.b.6.0,
i8* %arg._6binary__add_2.0, i8* %arg._6binary__add_2.1, i64 %arg._6binary__add_2.2, i64 %arg._6binary__add_2.3, i64 addrspace(1)* %arg._6binary__add_2.4, i64 %arg._6binary__add_2.5.0, i64 %arg._6binary__add_2.6.0) #0 {
entry:
%.114 = tail call spir_func i64 @_Z13get_global_idj(i32 0)
%.136 = icmp slt i64 %.114, 0
%.137 = select i1 %.136, i64 %arg.a.5.0, i64 0
%.138 = add i64 %.137, %.114
%.151 = getelementptr i64, i64 addrspace(1)* %arg.a.4, i64 %.138
%.152 = load i64, i64 addrspace(1)* %.151, align 8
%.173 = select i1 %.136, i64 %arg.b.5.0, i64 0
%.174 = add i64 %.173, %.114
%.187 = getelementptr i64, i64 addrspace(1)* %arg.b.4, i64 %.174
%.188 = load i64, i64 addrspace(1)* %.187, align 8
%.196 = add nsw i64 %.188, %.152
%.220 = select i1 %.136, i64 %arg._6binary__add_2.5.0, i64 0
%.221 = add i64 %.220, %.114
%.234 = getelementptr i64, i64 addrspace(1)* %arg._6binary__add_2.4, i64 %.221
store i64 %.196, i64 addrspace(1)* %.234, align 8
store i8* null, i8** %.ret, align 8
ret i32 0
}
The instructions: %.136 = icmp slt i64 %.114, 0
checks if the result of get_global_id
is < 0 and is superfluous. We should investigate why Numba generates these checks and how can they be disabled.