tvm
tvm copied to clipboard
[Bug] [Relax] Segmentation fault when executeing inference for the IR with branch in the runtime
Actual behavior
...
[09:37:39] /software/tvm/src/runtime/relax_vm/vm.cc:735:
pc = 21, execute: vm.builtin.alloc_tensor
Segmentation fault (core dumped)
Steps to reproduce
import tvm
from tvm import relax
import numpy as np
from tvm.script import ir as I
from tvm.script import tir as T
from tvm.script import relax as R
@I.ir_module
class Module:
@R.function
def main(cond: R.Tensor((), dtype="bool"), x: R.Tensor((1,), dtype="float32")) -> R.Tensor((4703,), dtype="float32"):
cls = Module
if cond:
gv = R.add(x,x)
gv1 = R.add(x,x)
y_then = R.multiply(gv,gv1)
y: R.Tensor((1,), dtype="float32") = y_then
else:
gv3 = R.multiply(x,x)
gv4 = R.multiply(x,x)
y_else = R.add(gv3,gv4)
y: R.Tensor((1,), dtype="float32") = y_else
return y
mod = Module
input_0 = tvm.nd.array(np.bool_(False))
input_1 = tvm.nd.array(np.random.random([1]).astype('float32'))
ex = relax.build(mod, target='llvm')
vm = relax.VirtualMachine(ex, tvm.cpu())
mod_outputs = vm['main'](input_0,input_1) # segfault!
cc @junrushao
@Lunderberg @tqchen @Hzfengsy @junrushao
Hi all, I encountered a Segmentation Fault error while running inference on IRs containing an IF-Else structure. Since Segfaults are typically high-priority bugs, could you please assist me in investigating this issue? Thank you!
Looks like this is a bug in the StaticPlanBlockMemory pass. Printing out the module before and after this pass, the example has the following main function.
Before `StaticPlanBlockMemory` (click to expand)
@R.function
def main(cond: R.Tensor((), dtype="bool"), x: R.Tensor((1,), dtype="float32")) -> R.Tensor(
(1,), dtype="float32"
):
R.func_attr({"relax.force_pure": True})
cls = ModBeforeStaticPlanBlockMemory
if cond:
alloc = R.builtin.alloc_tensor(
R.shape([1]), R.dtype("float32"), R.prim_value(0), R.str("global")
)
cls.add(x, x, alloc)
gv = alloc
alloc1 = R.builtin.alloc_tensor(
R.shape([1]), R.dtype("float32"), R.prim_value(0), R.str("global")
)
cls.add(x, x, alloc1)
gv1 = alloc1
alloc2 = R.builtin.alloc_tensor(
R.shape([1]), R.dtype("float32"), R.prim_value(0), R.str("global")
)
cls.multiply(gv, gv1, alloc2)
y_then = alloc2
y: R.Tensor((1,), dtype="float32") = y_then
else:
alloc3 = R.builtin.alloc_tensor(
R.shape([1]), R.dtype("float32"), R.prim_value(0), R.str("global")
)
cls.multiply(x, x, alloc3)
gv3 = alloc3
alloc4 = R.builtin.alloc_tensor(
R.shape([1]), R.dtype("float32"), R.prim_value(0), R.str("global")
)
cls.multiply(x, x, alloc4)
gv4 = alloc4
alloc5 = R.builtin.alloc_tensor(
R.shape([1]), R.dtype("float32"), R.prim_value(0), R.str("global")
)
cls.add(gv3, gv4, alloc5)
y_else = alloc5
y: R.Tensor((1,), dtype="float32") = y_else
return y
After `StaticPlanBlockMemory` (click to expand)
@R.function
def main(cond: R.Tensor((), dtype="bool"), x: R.Tensor((1,), dtype="float32")) -> R.Tensor(
(1,), dtype="float32"
):
R.func_attr({"relax.force_pure": True})
cls = ModAfterStaticPlanBlockMemory
if cond:
storage = R.memory.alloc_storage(
R.shape([4]), R.prim_value(0), R.str("global"), R.dtype("float32")
)
alloc = R.memory.alloc_tensor(
storage, R.prim_value(0), R.shape([1]), R.dtype("float32")
)
cls.add(x, x, alloc)
gv = alloc
storage1 = R.memory.alloc_storage(
R.shape([4]), R.prim_value(0), R.str("global"), R.dtype("float32")
)
alloc1 = R.memory.alloc_tensor(
storage1, R.prim_value(0), R.shape([1]), R.dtype("float32")
)
cls.add(x, x, alloc1)
gv1 = alloc1
alloc2 = R.builtin.alloc_tensor(
R.shape([1]), R.dtype("float32"), R.prim_value(0), R.str("global")
)
cls.multiply(gv, gv1, alloc2)
y_then = alloc2
y: R.Tensor((1,), dtype="float32") = y_then
else:
storage: R.Object
storage1: R.Object
alloc3 = R.memory.alloc_tensor(
storage, R.prim_value(0), R.shape([1]), R.dtype("float32")
)
cls.multiply(x, x, alloc3)
gv3 = alloc3
alloc4 = R.memory.alloc_tensor(
storage1, R.prim_value(0), R.shape([1]), R.dtype("float32")
)
cls.multiply(x, x, alloc4)
gv4 = alloc4
alloc5 = R.builtin.alloc_tensor(
R.shape([1]), R.dtype("float32"), R.prim_value(0), R.str("global")
)
cls.add(gv3, gv4, alloc5)
y_else = alloc5
y: R.Tensor((1,), dtype="float32") = y_else
return y
Prior to StaticPlanBlockMemory, the allocations in the if and else branches are independent. Afterwards, both branches refer to the same storage and storage1 objects, even though these objects have only been defined in the if branch.
Investigating to see if there's a clear cause for this.
After looking into it, it looks like there may be some larger fixes required. Currently, the StaticPlanBlockMemory pass does have some handling of relax::If nodes in the StorageAllocatorInit, but the StorageAllocator itself assumes that an allocation prior to the first usage will be valid in all later usages.
I've put together a few unit tests to experiment with it (in this dev branch). Depending on which expressions are moved into the conditional, it either results in out-of-scope access of the R.memory.alloc_storage result, or extra allocations.
@Lunderberg Thanks for your deep investigation and the fixing plan.