AITemplate
AITemplate copied to clipboard
Compile issue: Tensor conv2d_bias_64_1 not in outputs for op avg_pool2d_53
summary
I use AITemplate to re-construct a diffusion model which is slightly different than the one in examples, but error occurs while call compile_model(). Since it says some conv2d_bias tensor is not in the outputs of some AvgPooling op, so I just show the related code as below, where 'AvgPooling' is only used by the 'Resample' module. I review the forward implementation of 'ResidualBlock' several times, but no clue can be found.
code1
the nn.AvgPool2d is only defined in Resample Module, and only used in ResidualBlock forward()
class Resample(nn.Module):
def __init__(self, in_dim, out_dim, scale_factor, use_conv=False):
assert scale_factor in [0.5, 1.0, 2.0]
super(Resample, self).__init__()
self.in_dim = in_dim
self.out_dim = out_dim
self.scale_factor = scale_factor
self.use_conv = use_conv
# layers
if scale_factor == 2.0:
self.resample = nn.Sequential(
nn.Upsampling2d(scale_factor=scale_factor, mode='nearest'),
nn.Conv2dBias(in_dim, out_dim, 3, 1, padding=1) if use_conv else nn.Identity())
elif scale_factor == 0.5:
if use_conv:
self.resample = nn.Conv2dBias(in_dim, out_dim, 3, stride=2, padding=1)
else:
self.resample = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
else:
self.resample = nn.Identity()
def forward(self, x):
return self.resample(x)
class SiLU(nn.Module):
def __init__(self) -> None:
super(SiLU, self).__init__()
self.silu = ops.silu
def forward(self, x):
out = self.silu(x)
return out
class ResidualBlock(nn.Module):
def __init__(self, in_dim, embed_dim, out_dim, use_scale_shift_norm=True,
scale_factor=1.0, dropout=0.0):
super(ResidualBlock, self).__init__()
self.in_dim = in_dim
self.embed_dim = embed_dim
self.out_dim = out_dim
self.use_scale_shift_norm = use_scale_shift_norm
self.scale_factor = scale_factor
# layers
self.layer1 = nn.ModuleList([
nn.GroupNorm(32, in_dim),
SiLU(),
nn.Conv2dBias(in_dim, out_dim, 3, 1, padding=1)])
self.resample = Resample(in_dim, in_dim, scale_factor, use_conv=False)
self.embedding = nn.Sequential(
SiLU(),
nn.Linear(embed_dim, out_dim * 2 if use_scale_shift_norm else out_dim))
self.layer2 = nn.ModuleList([
nn.GroupNorm(32, out_dim),
SiLU(),
nn.Dropout(dropout),
nn.Conv2dBias(out_dim, out_dim, 3, 1, padding=1)])
self.shortcut = nn.Identity() if in_dim == out_dim else nn.Conv2dBias(in_dim, out_dim, 1, 1)
def forward(self, x, e):
hidden_states = x
hidden_states = self.layer1[0](hidden_states)
hidden_states_0 = self.layer1[1](hidden_states)
x = self.resample(x)
hidden_states_1 = self.resample(hidden_states_0) # error may occur here ?
hidden_states_2 = self.layer1[2](hidden_states_1)
e = self.embedding(e)
bs, dim = get_shape(e)
e = ops.reshape()(e, [bs, 1, 1, dim])
hidden_states = hidden_states_2 + e
hidden_states = self.layer2[0](hidden_states)
hidden_states = self.layer2[1](hidden_states)
hidden_states = self.layer2[2](hidden_states)
hidden_states = self.layer2[3](hidden_states)
x = self.shortcut(x)
out = hidden_states + x
return out
code2
code in convert2ait_upsampler.py
def rebuild_net(use_fp16_acc=False, convert_conv_to_gemm=False):
...
net = pytorch_model().cuda().half() # use fp16
net.eval()
ait_net = AITUpsampler()
ait_net.name_parameter_tensor()
mapped_params = map_pt_params(ait_net, net)
batch_size = 4
hh = 256
ww = 256
cc = 3
x0 = Tensor(
[batch_size, hh, ww, cc], name="input0", is_input=True
)
t = Tensor([batch_size, upsampler256_config['dim']], name="input1", is_input=True)
y = Tensor([batch_size, upsampler256_config['y_dim']], name="input2", is_input=True)
concat = Tensor(
[batch_size, hh, ww, cc], name="input3", is_input=True
)
Y_out = ait_net(x0, t, y, concat)
target = detect_target(
use_fp16_acc=use_fp16_acc, convert_conv_to_gemm=convert_conv_to_gemm
)
compile_model(Y_out, target, "./tmp", "AIT_UPSAMPLER256", constants=mapped_params)
Error
Traceback (most recent call last):
File "convert2ait_upsampler.py", line 106, in <module>
compile_net(True, True)
File "convert2ait_upsampler.py", line 103, in compile_net
compile_model(Y_out, target, "./tmp", "AIT_UPSAMPLER", constants=mapped_params)
File "/home/envs/zero/lib/python3.8/site-packages/aitemplate/compiler/compiler.py", line 152, in compile_model
compiler.transform.remove_no_ops(graph)
File "/home/envs/zero/lib/python3.8/site-packages/aitemplate/compiler/transform/remove_no_ops.py", line 167, in remove_no_ops
sorted_graph = f_pass(sorted_graph)
File "/home/envs/zero/lib/python3.8/site-packages/aitemplate/compiler/transform/remove_no_ops.py", line 82, in _remove_no_op_expands
return transform_utils.sanitize_sorted_graph(sorted_graph)
File "/home/envs/zero/lib/python3.8/site-packages/aitemplate/compiler/transform/transform_utils.py", line 272, in sanitize_sorted_graph
check_graph_validity(new_sorted_graph, raiseError=True)
File "/home/envs/zero/lib/python3.8/site-packages/aitemplate/compiler/transform/transform_utils.py", line 69, in check_graph_validity
valid = handleError(
File "/home/envs/zero/lib/python3.8/site-packages/aitemplate/compiler/transform/transform_utils.py", line 40, in handleError
raise RuntimeError(msg)
RuntimeError: Tensor conv2d_bias_64_1 not in outputs for op avg_pool2d_53
Can you try commenting out this line: https://github.com/facebookincubator/AITemplate/blob/main/python/aitemplate/compiler/transform/remove_no_ops.py#L163? I think you hit a bug in this pass. Also, are there expand ops in your model?
Is there an update on this problem? I'm facing a similar problem and the line to be commented off above is an parenthesis. So in the function _is_compatible_with_broadcasting
we just try simply return True?
My code is trying to build Backbone in YOLOX:
class YOLOPAFPN(nn.Module):
"""
YOLOv3 model. Darknet 53 is the default backbone of this model.
"""
def __init__(
self,
depth=1.0,
width=1.0,
in_features=("dark3", "dark4", "dark5"),
in_channels=[256, 512, 1024],
depthwise=False,
act="relu",
):
super().__init__()
self.backbone = CSPDarknet(depth, width, depthwise=depthwise, out_features=in_features, act=act)
self.in_features = in_features
self.in_channels = in_channels
Conv = DWConv if depthwise else BaseConv
self.upsample = nn.upsampling2d(scale_factor=2, mode="nearest")
self.lateral_conv0 = BaseConv(
int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act
)
self.C3_p4 = CSPLayer(
int(2 * in_channels[1] * width),
int(in_channels[1] * width),
round(3 * depth),
False,
depthwise=depthwise,
act=act,
) # cat
self.reduce_conv1 = BaseConv(
int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act
)
self.C3_p3 = CSPLayer(
int(2 * in_channels[0] * width),
int(in_channels[0] * width),
round(3 * depth),
False,
depthwise=depthwise,
act=act,
)
# bottom-up conv
self.bu_conv2 = Conv(
int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act
)
self.C3_n3 = CSPLayer(
int(2 * in_channels[0] * width),
int(in_channels[1] * width),
round(3 * depth),
False,
depthwise=depthwise,
act=act,
)
# bottom-up conv
self.bu_conv1 = Conv(
int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act
)
self.C3_n4 = CSPLayer(
int(2 * in_channels[1] * width),
int(in_channels[2] * width),
round(3 * depth),
False,
depthwise=depthwise,
act=act,
)
def forward(self, input):
"""
Args:
inputs: input images.
Returns:
Tuple[Tensor]: FPN feature.
"""
# backbone
out_features = self.backbone(input)
features = [out_features[f] for f in self.in_features]
[x2, x1, x0] = features
fpn_out0 = self.lateral_conv0(x0) # 1024->512/32
f_out0 = self.upsample(fpn_out0) # 512/16
f_out0 = aitcat()([f_out0, x1], 3) # 512->1024/16
f_out0 = self.C3_p4(f_out0) # 1024->512/16
fpn_out1 = self.reduce_conv1(f_out0) # 512->256/16
f_out1 = self.upsample(fpn_out1) # 256/8
f_out1 = aitcat()([f_out1, x2], 3) # 256->512/8
pan_out2 = self.C3_p3(f_out1) # 512->256/8
p_out1 = self.bu_conv2(pan_out2) # 256->256/16
p_out1 = aitcat()([p_out1, fpn_out1], 3) # 256->512/16
pan_out1 = self.C3_n3(p_out1) # 512->512/16
p_out0 = self.bu_conv1(pan_out1) # 512->512/32
p_out0 = aitcat()([p_out0, fpn_out0], 3) # 512->1024/32
pan_out0 = self.C3_n4(p_out0) # 1024->1024/32
# outputs = (pan_out2, pan_out1, pan_out0)
return pan_out2
I have already replaced the convolution operation with aitemplate.frontend.nn.conv2dBias
and can compile successfully when only have f_out0
.
My error looks like this:
.
.
.
2023-07-17 19:50:15,384 DEBUG <aitemplate.compiler.transform.name_graph> before name_graph: func_cnt=101, tensor_cnt=0, len(func_name_to_tensor_cnt)=101, len(user_provided_dim)=546
2023-07-17 19:50:15,385 DEBUG <aitemplate.compiler.transform.name_graph> after name_graph: func_cnt=101, tensor_cnt=0, len(func_name_to_tensor_cnt)=101, len(user_provided_dim)=546
2023-07-17 19:50:15,565 DEBUG <aitemplate.utils.graph_utils> Dumped dedup_symbolic_name visualization to ./tmp/test_compile_yolox_backbone_1/dedup_symbolic_name_graph_vis.html
2023-07-17 19:50:15,569 INFO <aitemplate.compiler.transform.memory_planning> Workspace shared_size=0 unique_size=0
2023-07-17 19:50:15,569 INFO <aitemplate.compiler.transform.memory_planning> max_blob=3670016 constant_offset=0
Traceback (most recent call last):
File "/workspaces/torchsparse-misc/conversion/yolox/test_ait_model.py", line 72, in <module>
model_compiled = compile_module(
File "/workspaces/torchsparse-misc/conversion/yolox/test_ait_model.py", line 44, in compile_module
module = compile_model(y, target, "./tmp", model_name)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/utils/misc.py", line 93, in inner_function
return f(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/compiler.py", line 308, in compile_model
_verify_outputs_still_in_graph(graph, output_tensors)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/compiler.py", line 95, in _verify_outputs_still_in_graph
raise ValueError(
ValueError: Output output_0 was not found in the graph after optimizations.
Ran into a similar issue while working on RRDBNet.
def forward(self, x):
x1 = self.lrelu(self.conv1(x))
x2 = self.lrelu(self.conv2(self.cat((x, x1), 3)))
x3 = self.lrelu(self.conv3(self.cat((x, x1, x2), 3)))
x4 = self.lrelu(self.conv4(self.cat((x, x1, x2, x3), 3)))
x5 = self.conv5(self.cat((x, x1, x2, x3, x4), 3))
# Empirically, we use 0.2 to scale the residual for better performance
out = x5 * 0.2 + x
return out
When using ops.concatenate
directly it appears that the next conv layers are not used, only the first is used.
AIT dump (dump_ait_to_py):
def model(self):
conv2d_bias_0_0 = ops.conv2d_bias(dilate=1, group=1, pad=1, stride=1)(self.rdb_input, self.conv1_weight, self.conv1_bias)
# Set outputs
# End of setting outputs
return
Another indicator was that profiling only ran for the first conv layer.
~~Marking as is_input
resolves the issue.~~
Wrapping the operator appears to resolve the issue.
def cat(self, tensors, dim):
out = ops.concatenate()(tensors, dim)
return out