compute-runtime
compute-runtime copied to clipboard
A770: Printing empty string segfaults
I'm working on Julia support for oneAPI, and after upgrading to an A770 I noticed that printing an empty string segfaults.
Julia MWE:
using oneAPI
function kernel()
oneAPI.@printf("")
return
end
@oneapi kernel()
synchronize()
signal (11): Segmentation fault
in expression starting at /home/tim/Julia/pkg/oneAPI/wip.jl:13
strnlen_s at /workspace/srcdir/compute-runtime/shared/source/helpers/string.h:40
printString at /workspace/srcdir/compute-runtime/shared/source/program/print_formatter.cpp:54
printKernelOutput at /workspace/srcdir/compute-runtime/shared/source/program/print_formatter.cpp:48
printOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/printf_handler/printf_handler.cpp:76
printPrintfOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/kernel/kernel_imp.cpp:974
printKernelsPrintfOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:178
postSyncOperations at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:184
synchronizeByPollingForTaskCount at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:171
synchronize at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:147
zeCommandQueueSynchronize at /workspace/srcdir/compute-runtime/level_zero/api/core/ze_cmdqueue_api_entrypoints.h:39
macro expansion at /home/tim/Julia/pkg/oneAPI/lib/level-zero/libze.jl:1556 [inlined]
macro expansion at /home/tim/Julia/pkg/oneAPI/lib/level-zero/utils.jl:5 [inlined]
macro expansion at /home/tim/Julia/pkg/oneAPI/lib/level-zero/libze.jl:13 [inlined]
zeCommandQueueSynchronize at /home/tim/Julia/pkg/oneAPI/lib/utils/call.jl:24
synchronize at /home/tim/Julia/pkg/oneAPI/lib/level-zero/cmdqueue.jl:34 [inlined]
synchronize at /home/tim/Julia/pkg/oneAPI/lib/level-zero/cmdqueue.jl:34 [inlined]
synchronize at /home/tim/Julia/pkg/oneAPI/src/context.jl:59
main at /home/tim/Julia/pkg/oneAPI/wip.jl:10
unknown function (ip: 0x7ff371f1767f)
The kernel above generates the following LLVM IR:
julia> oneAPI.code_llvm(kernel, Tuple{}; kernel=true, dump_module=true, debuginfo=:none)
; ModuleID = 'text'
source_filename = "text"
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64-unknown-unknown"
@0 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
declare i32 @printf(i8*, ...)
define spir_kernel void @_Z6kernel() local_unnamed_addr #0 {
conversion:
%0 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @0, i64 0, i64 0))
ret void
}
attributes #0 = { "probe-stack"="inline-asm" }
!llvm.module.flags = !{!0, !1}
!opencl.ocl.version = !{!2}
!opencl.spirv.version = !{!3}
!julia.kernel = !{!4}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 2, i32 0}
!3 = !{i32 1, i32 5}
!4 = !{void ()* @_Z6kernel}
... which we translate to SPIR-V using the Khronos translator:
; SPIR-V
; Version: 1.0
; Generator: Khronos LLVM/SPIR-V Translator; 14
; Bound: 18
; Schema: 0
OpCapability Addresses
OpCapability Kernel
OpCapability Int64
OpCapability Int8
%1 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %11 "_Z6kernel"
OpSource OpenCL_C 200000
OpName %conversion "conversion"
OpDecorate %8 Constant
OpDecorate %8 Alignment 1
%ulong = OpTypeInt 64 0
%uchar = OpTypeInt 8 0
%uint = OpTypeInt 32 0
%ulong_1 = OpConstant %ulong 1
%ulong_0 = OpConstant %ulong 0
%_arr_uchar_ulong_1 = OpTypeArray %uchar %ulong_1
%_ptr_Function__arr_uchar_ulong_1 = OpTypePointer Function %_arr_uchar_ulong_1
%void = OpTypeVoid
%10 = OpTypeFunction %void
%_ptr_Function_uchar = OpTypePointer Function %uchar
%6 = OpConstantNull %_arr_uchar_ulong_1
%8 = OpVariable %_ptr_Function__arr_uchar_ulong_1 Function %6
%11 = OpFunction %void None %10
%conversion = OpLabel
%15 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %8 %ulong_0 %ulong_0
%17 = OpExtInst %uint %1 printf %15
OpReturn
OpFunctionEnd
The compiled SPIR-V kernel is attached, and can be loaded (after extracting) using the following C-based loader:
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <level_zero/ze_api.h>
#include <level_zero/zet_api.h>
void read_spirv_binary(const char *filename, uint8_t **spirv, size_t *spirv_size) {
FILE *file = fopen(filename, "rb");
assert(file != NULL);
fseek(file, 0, SEEK_END);
*spirv_size = ftell(file);
fseek(file, 0, SEEK_SET);
*spirv = (uint8_t *)malloc(*spirv_size);
assert(*spirv != NULL);
size_t bytes_read = fread(*spirv, 1, *spirv_size, file);
assert(bytes_read == *spirv_size);
fclose(file);
}
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: %s <path to SPIR-V binary>\n", argv[0]);
return 1;
}
ze_result_t result = zeInit(0);
assert(result == ZE_RESULT_SUCCESS);
uint32_t driver_count = 0;
result = zeDriverGet(&driver_count, NULL);
assert(result == ZE_RESULT_SUCCESS);
assert(driver_count > 0);
ze_driver_handle_t driver;
result = zeDriverGet(&driver_count, &driver);
assert(result == ZE_RESULT_SUCCESS);
uint32_t device_count = 0;
result = zeDeviceGet(driver, &device_count, NULL);
assert(result == ZE_RESULT_SUCCESS);
assert(device_count > 0);
ze_device_handle_t device;
result = zeDeviceGet(driver, &device_count, &device);
assert(result == ZE_RESULT_SUCCESS);
ze_context_handle_t context;
ze_context_desc_t context_desc = {
.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC,
.pNext = NULL,
.flags = 0
};
result = zeContextCreate(driver, &context_desc, &context);
assert(result == ZE_RESULT_SUCCESS);
uint8_t *spirv;
size_t spirv_size;
read_spirv_binary(argv[1], &spirv, &spirv_size);
ze_module_handle_t module;
ze_module_desc_t module_desc = {
.stype = ZE_STRUCTURE_TYPE_MODULE_DESC,
.pNext = NULL,
.format = ZE_MODULE_FORMAT_IL_SPIRV,
.inputSize = spirv_size,
.pInputModule = spirv,
.pBuildFlags = ""
};
result = zeModuleCreate(context, device, &module_desc, &module, NULL);
assert(result == ZE_RESULT_SUCCESS);
ze_kernel_handle_t kernel;
ze_kernel_desc_t kernel_desc = {
.stype = ZE_STRUCTURE_TYPE_KERNEL_DESC,
.pNext = NULL,
.flags = 0,
.pKernelName = "_Z6kernel"
};
result = zeKernelCreate(module, &kernel_desc, &kernel);
assert(result == ZE_RESULT_SUCCESS);
ze_command_queue_handle_t cmd_queue;
ze_command_queue_desc_t cmd_queue_desc = {
.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
.pNext = NULL,
.ordinal = 0,
.index = 0,
.flags = 0,
.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT,
.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL
};
result = zeCommandQueueCreate(context, device, &cmd_queue_desc, &cmd_queue);
assert(result == ZE_RESULT_SUCCESS);
ze_command_list_handle_t cmd_list;
ze_command_list_desc_t cmd_list_desc = {
.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC,
.pNext = NULL,
.commandQueueGroupOrdinal = 0,
.flags = 0
};
result = zeCommandListCreate(context, device, &cmd_list_desc, &cmd_list);
assert(result == ZE_RESULT_SUCCESS);
ze_group_count_t group_count = {
.groupCountX = 1,
.groupCountY = 1,
.groupCountZ = 1
};
result = zeCommandListAppendLaunchKernel(cmd_list, kernel, &group_count, NULL, 0, NULL);
assert(result == ZE_RESULT_SUCCESS);
result = zeCommandListClose(cmd_list);
assert(result == ZE_RESULT_SUCCESS);
result = zeCommandQueueExecuteCommandLists(cmd_queue, 1, &cmd_list, NULL);
assert(result == ZE_RESULT_SUCCESS);
result = zeCommandQueueSynchronize(cmd_queue, UINT32_MAX);
assert(result == ZE_RESULT_SUCCESS);
zeKernelDestroy(kernel);
zeModuleDestroy(module);
free(spirv);
zeCommandListDestroy(cmd_list);
zeCommandQueueDestroy(cmd_queue);
zeContextDestroy(context);
return 0;
}
Tested on Linux 6.2.11, both using compute-runtime 22.43.24595.30 from the Arch Linux repos as our own build of 22.53.25593. Printing non-empty strings works, as does printing empty strings on another system of mine (a NUC with Xe graphics, running Linux 5.10 with compute-runtime 22.53.25593).
Fixed in https://github.com/intel/compute-runtime/commit/1a1bd04d4a7ee9fc0a5fedf2bcf656f40fc0f3f9
@maleadt could you confirm it is working fine now?
No. The issue doesn't seem to be a nullpointer exception.
❯ gdb --args /home/tim/Julia/depot/juliaup/julia-1.8.5+0.x64.linux.gnu/bin/julia --project wip.jl
Thread 1 "julia" received signal SIGSEGV, Segmentation fault.
#2 0x00007ffeb54a2acd in NEO::PrintFormatter::printKernelOutput(std::function<void (char*)> const&) (this=0x7fffffffb0f0, print=...)
at /workspace/srcdir/compute-runtime/shared/source/program/print_formatter.cpp:49
49 printString(formatString, print);
(gdb) l
44 } else {
45 while (currentOffset + sizeof(char *) <= printfOutputBufferSize) {
46 char *formatString = nullptr;
47 read(&formatString);
48 if (formatString != nullptr) {
49 printString(formatString, print);
50 }
51 }
52 }
53 }
(gdb) p formatString
$1 = 0xffffd556aa670000 <error: Cannot access memory at address 0xffffd556aa670000>
Handling nullptr on neo side seems to be correct, IGC should take a look at this issue from their side
Any update?
Hi, the issue was fixed in this commit
This still fails, now on non-Arc hardware too. I'm using NEO v24.5.28454 and IGC v1.0.15985.
[3137871] signal (11.1): Segmentation fault
in expression starting at REPL[4]:1
strnlen_s at /workspace/srcdir/compute-runtime/shared/source/helpers/string.h:40
printString at /workspace/srcdir/compute-runtime/shared/source/program/print_formatter.cpp:59
printKernelOutput at /workspace/srcdir/compute-runtime/shared/source/program/print_formatter.cpp:52
printOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/printf_handler/printf_handler.cpp:76
printPrintfOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/kernel/kernel_imp.cpp:1163
printKernelsPrintfOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:196
postSyncOperations at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:213
synchronizeByPollingForTaskCount at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:188
synchronize at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:164
zeCommandQueueSynchronize at /workspace/srcdir/compute-runtime/level_zero/api/core/ze_cmdqueue_api_entrypoints.h:39
the issue was fixed in this commit
This still fails, now on non-Arc hardware too. I'm using NEO v24.5.28454 and IGC v1.0.15985.
@JablonskiMateusz / @amielcza igc-1.0.15985.0 contains 06eeecbc8a, so that fix was not good/enough?