tvm [Bug] Segmentation fault in AOT-compiled TVM model during inference

[Bug] Segmentation fault in AOT-compiled TVM model during inference

Open Koukyosyumei opened this issue 1 year ago • 0 comments

I'm encountering a segmentation fault when running an AOT-compiled TVM model for inference. The crash occurs within the TVM-generated code, specifically in the tvmgen_default___tvm_main___compute_ function.

Build PyTorch Model

class Simple(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.lin1 = nn.Linear(64, 32)
        self.lin2 = nn.Linear(32, 10)

    def forward(self, x):
        x = self.lin1(x)
        x = self.lin2(x)
        return x

model = Simple()

input_shape = [1, 64]
input_name = "input0"
shape_list = [(input_name, input_shape)]

scripted_model = torch.jit.trace(model, torch.Tensor(X_train[[0]])).eval()
mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)

EXECUTOR = Executor("aot")
target = "llvm"

with tvm.transform.PassContext(opt_level=0):
    lib = relay.build(mod, target=target, params=params, executor=EXECUTOR)

for i, imported_module in enumerate(lib.get_lib().imported_modules):
    with open(f"tvm_aot_lib{i}.ll", mode="w") as f:
        f.write(imported_module.get_source("ll"))

Inference Code in C

// tvm_wrapper.c

#include <stdio.h>
#include <stdint.h>
#include <stddef.h>
#include <tvm/runtime/c_runtime_api.h>

extern int32_t tvmgen_default___tvm_main__(void* args, int32_t* arg_type_ids, int32_t num_args, void* out_ret_value, int32_t* out_ret_tcode, void* resource_handle);

int32_t tvm_wrapper(void* args, int32_t* arg_type_codes, int32_t num_args, void* out_ret_value, int32_t* out_ret_tcode, void* resource_handle) {
    printf("tvm_wrapper: Calling tvmgen_default___tvm_main__\n");
    printf("tvm_wrapper: args=%p, arg_type_codes=%p, num_args=%d\n", args, arg_type_codes, num_args);

    if (args == NULL || arg_type_codes == NULL) {
        printf("tvm_wrapper: Error - NULL pointer passed\n");
        return -1;
    }

    TVMValue* tvm_args = (TVMValue*)args;
    for (int i = 0; i < num_args; i++) {
        DLTensor* dlTensor = (DLTensor*)tvm_args[i].v_handle;
        printf("tvm_wrapper: args[%d].v_handle=%p, arg_type_codes[%d]=%d\n", i, tvm_args[i].v_handle, i, arg_type_codes[i]);
        printf("tvm_wrapper: DLTensor[%d]: data=%p, ndim=%d, dtype={code=%d, bits=%d, lanes=%d}\n",
               i, dlTensor->data, dlTensor->ndim, dlTensor->dtype.code, dlTensor->dtype.bits, dlTensor->dtype.lanes);
        printf("tvm_wrapper: DLTensor[%d]: shape=[%ld, %ld]\n", i, dlTensor->shape[0], dlTensor->shape[1]);
    }

    // Print the first few elements of the input tensor
    float* input_data = (float*)((DLTensor*)tvm_args[0].v_handle)->data;
    printf("tvm_wrapper: Input data (first 10 elements): ");
    for (int i = 0; i < 10; i++) {
        printf("%f ", input_data[i]);
    }
    printf("\n");
    
    int32_t result = tvmgen_default___tvm_main__(args, arg_type_codes, num_args, out_ret_value, out_ret_tcode, resource_handle);
    printf("tvm_wrapper: tvmgen_default___tvm_main__ returned %d\n", result);
    return result;
}

// tvm_aot_main.c

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <tvm/runtime/c_runtime_api.h>

// Declare the wrapper function
int32_t tvm_wrapper(void* args, int32_t* arg_type_codes, int32_t num_args, void* out_ret_value, int32_t* out_ret_tcode, void* resource_handle);

int main() {
    // Allocate input and output tensors
    float* output_data = (float*)aligned_alloc(64, 10 * sizeof(float));

    float input_data[64];
    for (int i = 0; i < 64; i++) {
        input_data[i] = (float)i / 64.0f;
    }

    // Fill input_data with your input values here

    // Prepare TVM arguments
    DLTensor input_tensor = {0};
    input_tensor.data = input_data;
    input_tensor.device = (DLDevice){kDLCPU, 0};
    input_tensor.ndim = 2;
    int64_t input_shape[2] = {1, 64};
    input_tensor.shape = input_shape;
    input_tensor.strides = NULL;
    input_tensor.byte_offset = 0;
    input_tensor.dtype = (DLDataType){kDLFloat, 32, 1};

    DLTensor output_tensor = {0};
    output_tensor.data = output_data;
    output_tensor.device = (DLDevice){kDLCPU, 0};
    output_tensor.ndim = 2;
    int64_t output_shape[2] = {1, 10};
    output_tensor.shape = output_shape;
    output_tensor.strides = NULL;
    output_tensor.byte_offset = 0;
    output_tensor.dtype = (DLDataType){kDLFloat, 32, 1};

    TVMValue args[2];
    int arg_type_codes[2];
    args[0].v_handle = &input_tensor;
    args[1].v_handle = &output_tensor;
    arg_type_codes[0] = kTVMDLTensorHandle;
    arg_type_codes[1] = kTVMDLTensorHandle;

    // Run the model
    int ret = tvm_wrapper(args, arg_type_codes, 2, NULL, NULL, NULL);
    
    if (ret != 0) {
        printf("Error running the model\n");
        return 1;
    }

    // Print the output
    printf("Output:\n");
    for (int i = 0; i < 10; i++) {
        printf("%f ", output_data[i]);
    }
    printf("\n");

    return 0;
}

Build Script

llc -filetype=obj tvm_aot_lib0.ll -o tvm_aot_lib0.o
llc -filetype=obj tvm_aot_lib1.ll -o tvm_aot_lib1.o

gcc -g -static -c tvm_aot_main.c -o tvm_aot_main.o -I/home/koukyosyumei/Dev/tvm/include -I/home/koukyosyumei/Dev/tvm/3rdparty/dlpack/include -I/home/koukyosyumei/Dev/tvm/3rdparty/dmlc-core/include -I/home/koukyosyumei/Dev/tvm/build/crt_config -I/home/koukyosyumei/Dev/tvm/build
gcc -g -static -c tvm_wrapper.c -o tvm_wrapper.o -I/home/koukyosyumei/Dev/tvm/include -I/home/koukyosyumei/Dev/tvm/3rdparty/dlpack/include -I/home/koukyosyumei/Dev/tvm/3rdparty/dmlc-core/include -I/home/koukyosyumei/Dev/tvm/build/crt_config -I/home/koukyosyumei/Dev/tvm/build
gcc -g -static tvm_aot_main.o tvm_wrapper.o tvm_aot_lib1.o tvm_aot_lib0.o -o tvm_aot_inference -ldl -lm -pthread

Expected behavior

The compiled binary should return the predicted values.

Actual behavior

./tvm_aot_inference
tvm_wrapper: Calling tvmgen_default___tvm_main__
Segmentation fault

Backtrace with GDB

Using host libthread_db library "/lib/aarch64-linux-gnu/libthread_db.so.1".
tvm_wrapper: Calling tvmgen_default___tvm_main__

Program received signal SIGSEGV, Segmentation fault.
0x0000000000000000 in ?? ()
(gdb) bt
#0  0x0000000000000000 in ?? ()
#1  0x0000aaaaaaaa1bd0 in tvmgen_default___tvm_main___compute_ (dev_id=0, stack_shape=0xffffffffec90,
    stack_array=0xffffffffec30, input0_buffer_var=0xffffffffede8, stack_value=0xffffffffec18,
    stack_tcode=0xffffffffec0c, output_buffer_var=0xffffffffedc0)
#2  0x0000aaaaaaaa0db4 in tvmgen_default___tvm_main__ (args=<optimized out>, arg_type_ids=<optimized out>,
    num_args=<optimized out>, out_ret_value=<optimized out>, out_ret_tcode=<optimized out>,
    resource_handle=<optimized out>)
#3  0x0000aaaaaaaa0c14 in tvm_wrapper (args=0xffffffffedb0, arg_type_codes=0xffffffffed88, num_args=2,
    out_ret_value=0x0, out_ret_tcode=0x0, resource_handle=0x0) at tvm_wrapper.c:9
#4  0x0000aaaaaaaa0b20 in main () at tvm_aot_main.c:44
(gdb) q
A debugging session is active.

        Inferior 1 [process 65829] will be killed.

Environment

tvm                           0.18.dev0

Description:    Ubuntu 22.04.3 LTS
Release:        22.04

Steps to reproduce

Triage

needs-triage

Sep 11 '24 21:09 Koukyosyumei

tvm tvm copied to clipboard

[Bug] Segmentation fault in AOT-compiled TVM model during inference

Expected behavior

Actual behavior

Environment

Steps to reproduce

Triage

tvm
tvm copied to clipboard