tvm icon indicating copy to clipboard operation
tvm copied to clipboard

[Bug][CodeGen] V 0.18.0 compilation after tir.InjectSoftwarePipeline causes Segmentation Fault

Open talha-ahsan opened this issue 9 months ago • 0 comments

Expected behavior

Successful Compilation or a reason for why the compilation target is invalid

Actual behavior

Segmentation Fault

Environment

OS: Ubuntu 20.04 LTS Python: 3.10.4 TVM: v0.18.0 built from source with CPU only, no GPU usage in place

Steps to reproduce

import tvm
from tvm import tir
from tvm.tir.analysis.analysis import verify_well_formed, verify_memory

from tvm.script import tir as T

@T.prim_func
def tvmgen_default_fused_add_6(p0: T.Buffer((1, 64, 32, 32), "float32"), p1: T.Buffer((64, 1, 1), "float32"), T_add: T.Buffer((1, 64, 32, 32), "float32")):
    T.func_attr({"from_legacy_te_schedule": T.bool(True), "hash": "47763fd875dd0f0b", "target": T.target({"host": {"keys": ["cpu"], "kind": "llvm", "mtriple": "x86_64-pc-linux-gnu", "tag": ""}, "keys": ["cpu"], "kind": "llvm", "mtriple": "x86_64-pc-linux-gnu", "tag": ""}), "tir.noalias": T.bool(True)})
    for ax0_ax1_fused in T.parallel(64):
        for ax2, ax3_outer in T.grid(32, 2):
            a50 = T.float32()
            e47 = T.int32()
            d85 = T.float32()
            b06 = T.uint32()
            c94 = T.float32()
            c4a = T.uint32()
            cse_var_1: T.int32 = ax0_ax1_fused * T.Let(T.Cast("int32", T.acosh(T.Cast("float32", T.Let(T.min(704393518, T.Mul(1128758173, -304600591)) + T.Div(786621027, 154465473) - T.Sub(-1185977991, 741197984), where={c94: a50 - a50 + (a50 - (T.max(T.Let(a50, where={c4a: T.uint32(207842132)}), T.Cast("float32", -276537116) / a50) - T.truncmod(a50, a50) / T.ceil(T.truncmod(a50 - a50, T.float32(0.32240459004161348))))) + a50})))), where={a50: T.Shuffle([T.Let(T.Broadcast(T.float32(0.025094959058501232), 2), where={e47: -1608771718}), T.Broadcast(T.float32(0.32266743288635236), 4) - T.max(T.Broadcast(T.float32(0.28570350492695662), 4), T.Broadcast(T.float32(0.038162837982428366), 4)), T.Broadcast(T.float32(0.4958602758740962), 2) / T.Broadcast(T.float32(0.47331633233220372), 2), T.Broadcast(T.float32(0.63436709130106428), 4), T.Broadcast(T.nextafter(T.Shuffle([T.Broadcast(T.float32(0.49933378839457165), 4)], [0]), T.Shuffle([T.Broadcast(T.float32(0.61736031799398816), 4)], [2])), 4)], [T.Let(T.Let(T.Broadcast(T.uint32(186620266), 4), where={b06: T.uint32(1717657565)}), where={d85: T.float32(0.77145962398151591)})]) / T.min(T.min(T.float32(0.14105601079793095), T.float32(0.95742502636101623)), T.Mul(T.float32(0.98474368722054284), T.float32(0.46047640091329001)) + T.float32(0.35016339890396531) * T.min(T.Mul(T.float32(0.8305658331332687), T.float32(0.93471262737065297)) / T.float32(0.76050683782128836), T.float32(0.38560427994464419))) / T.Cast("float32", T.uint32(268509650))}) + ax2 * 32 + ax3_outer * 16
            T_add_1 = T.Buffer((65536,), data=T_add.data)
            p0_1 = T.Buffer((65536,), data=p0.data)
            p1_1 = T.Buffer((64,), data=p1.data)
            T_add_1[cse_var_1:cse_var_1 + 16] = p0_1[cse_var_1:cse_var_1 + 16] + T.Broadcast(p1_1[ax0_ax1_fused], 16)
            
func = tvmgen_default_fused_add_6
mod = tvm.ir.IRModule({'main': func})
if not verify_well_formed(mod) and verify_memory(func):
    print("Validation failed")
else: 
    print("Beginning Compilation")
    with tvm.transform.PassContext(opt_level=4):
        nopt_mod = tvm.build(mod)
    print("Success!")

Triage

Please refer to the list of label tags here to find the relevant tags and add them below in a bullet format (example below).

  • needs-triage
  • tir

talha-ahsan avatar Jan 13 '25 23:01 talha-ahsan