AITemplate copied to clipboard
Compile issue: Tensor conv2d_bias_64_1 not in outputs for op avg_pool2d_53
I use AITemplate to re-construct a diffusion model which is slightly different than the one in examples, but error occurs while call compile_model(). Since it says some conv2d_bias tensor is not in the outputs of some AvgPooling op, so I just show the related code as below, where 'AvgPooling' is only used by the 'Resample' module. I review the forward implementation of 'ResidualBlock' several times, but no clue can be found.
the nn.AvgPool2d is only defined in Resample Module, and only used in ResidualBlock forward()
class Resample(nn.Module):
def __init__(self, in_dim, out_dim, scale_factor, use_conv=False):
assert scale_factor in [0.5, 1.0, 2.0]
super(Resample, self).__init__()
self.in_dim = in_dim
self.out_dim = out_dim
self.scale_factor = scale_factor
self.use_conv = use_conv
# layers
if scale_factor == 2.0:
self.resample = nn.Sequential(
nn.Upsampling2d(scale_factor=scale_factor, mode='nearest'),
nn.Conv2dBias(in_dim, out_dim, 3, 1, padding=1) if use_conv else nn.Identity())
elif scale_factor == 0.5:
if use_conv:
self.resample = nn.Conv2dBias(in_dim, out_dim, 3, stride=2, padding=1)
self.resample = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
self.resample = nn.Identity()
def forward(self, x):
return self.resample(x)
class SiLU(nn.Module):
def __init__(self) -> None:
super(SiLU, self).__init__()
self.silu = ops.silu
def forward(self, x):
out = self.silu(x)
return out
class ResidualBlock(nn.Module):
def __init__(self, in_dim, embed_dim, out_dim, use_scale_shift_norm=True,
scale_factor=1.0, dropout=0.0):
super(ResidualBlock, self).__init__()
self.in_dim = in_dim
self.embed_dim = embed_dim
self.out_dim = out_dim
self.use_scale_shift_norm = use_scale_shift_norm
self.scale_factor = scale_factor
# layers
self.layer1 = nn.ModuleList([
nn.GroupNorm(32, in_dim),
nn.Conv2dBias(in_dim, out_dim, 3, 1, padding=1)])
self.resample = Resample(in_dim, in_dim, scale_factor, use_conv=False)
self.embedding = nn.Sequential(
nn.Linear(embed_dim, out_dim * 2 if use_scale_shift_norm else out_dim))
self.layer2 = nn.ModuleList([
nn.GroupNorm(32, out_dim),
nn.Conv2dBias(out_dim, out_dim, 3, 1, padding=1)])
self.shortcut = nn.Identity() if in_dim == out_dim else nn.Conv2dBias(in_dim, out_dim, 1, 1)
def forward(self, x, e):
hidden_states = x
hidden_states = self.layer1[0](hidden_states)
hidden_states_0 = self.layer1[1](hidden_states)
x = self.resample(x)
hidden_states_1 = self.resample(hidden_states_0) # error may occur here ?
hidden_states_2 = self.layer1[2](hidden_states_1)
e = self.embedding(e)
bs, dim = get_shape(e)
e = ops.reshape()(e, [bs, 1, 1, dim])
hidden_states = hidden_states_2 + e
hidden_states = self.layer2[0](hidden_states)
hidden_states = self.layer2[1](hidden_states)
hidden_states = self.layer2[2](hidden_states)
hidden_states = self.layer2[3](hidden_states)
x = self.shortcut(x)
out = hidden_states + x
return out
code in
def rebuild_net(use_fp16_acc=False, convert_conv_to_gemm=False):
net = pytorch_model().cuda().half() # use fp16
ait_net = AITUpsampler()
mapped_params = map_pt_params(ait_net, net)
batch_size = 4
hh = 256
ww = 256
cc = 3
x0 = Tensor(
[batch_size, hh, ww, cc], name="input0", is_input=True
t = Tensor([batch_size, upsampler256_config['dim']], name="input1", is_input=True)
y = Tensor([batch_size, upsampler256_config['y_dim']], name="input2", is_input=True)
concat = Tensor(
[batch_size, hh, ww, cc], name="input3", is_input=True
Y_out = ait_net(x0, t, y, concat)
target = detect_target(
use_fp16_acc=use_fp16_acc, convert_conv_to_gemm=convert_conv_to_gemm
compile_model(Y_out, target, "./tmp", "AIT_UPSAMPLER256", constants=mapped_params)
Traceback (most recent call last):
File "", line 106, in <module>
compile_net(True, True)
File "", line 103, in compile_net
compile_model(Y_out, target, "./tmp", "AIT_UPSAMPLER", constants=mapped_params)
File "/home/envs/zero/lib/python3.8/site-packages/aitemplate/compiler/", line 152, in compile_model
File "/home/envs/zero/lib/python3.8/site-packages/aitemplate/compiler/transform/", line 167, in remove_no_ops
sorted_graph = f_pass(sorted_graph)
File "/home/envs/zero/lib/python3.8/site-packages/aitemplate/compiler/transform/", line 82, in _remove_no_op_expands
return transform_utils.sanitize_sorted_graph(sorted_graph)
File "/home/envs/zero/lib/python3.8/site-packages/aitemplate/compiler/transform/", line 272, in sanitize_sorted_graph
check_graph_validity(new_sorted_graph, raiseError=True)
File "/home/envs/zero/lib/python3.8/site-packages/aitemplate/compiler/transform/", line 69, in check_graph_validity
valid = handleError(
File "/home/envs/zero/lib/python3.8/site-packages/aitemplate/compiler/transform/", line 40, in handleError
raise RuntimeError(msg)
RuntimeError: Tensor conv2d_bias_64_1 not in outputs for op avg_pool2d_53
Can you try commenting out this line: I think you hit a bug in this pass. Also, are there expand ops in your model?
Is there an update on this problem? I'm facing a similar problem and the line to be commented off above is an parenthesis. So in the function _is_compatible_with_broadcasting
we just try simply return True?
My code is trying to build Backbone in YOLOX:
class YOLOPAFPN(nn.Module):
YOLOv3 model. Darknet 53 is the default backbone of this model.
def __init__(
in_features=("dark3", "dark4", "dark5"),
in_channels=[256, 512, 1024],
self.backbone = CSPDarknet(depth, width, depthwise=depthwise, out_features=in_features, act=act)
self.in_features = in_features
self.in_channels = in_channels
Conv = DWConv if depthwise else BaseConv
self.upsample = nn.upsampling2d(scale_factor=2, mode="nearest")
self.lateral_conv0 = BaseConv(
int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act
self.C3_p4 = CSPLayer(
int(2 * in_channels[1] * width),
int(in_channels[1] * width),
round(3 * depth),
) # cat
self.reduce_conv1 = BaseConv(
int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act
self.C3_p3 = CSPLayer(
int(2 * in_channels[0] * width),
int(in_channels[0] * width),
round(3 * depth),
# bottom-up conv
self.bu_conv2 = Conv(
int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act
self.C3_n3 = CSPLayer(
int(2 * in_channels[0] * width),
int(in_channels[1] * width),
round(3 * depth),
# bottom-up conv
self.bu_conv1 = Conv(
int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act
self.C3_n4 = CSPLayer(
int(2 * in_channels[1] * width),
int(in_channels[2] * width),
round(3 * depth),
def forward(self, input):
inputs: input images.
Tuple[Tensor]: FPN feature.
# backbone
out_features = self.backbone(input)
features = [out_features[f] for f in self.in_features]
[x2, x1, x0] = features
fpn_out0 = self.lateral_conv0(x0) # 1024->512/32
f_out0 = self.upsample(fpn_out0) # 512/16
f_out0 = aitcat()([f_out0, x1], 3) # 512->1024/16
f_out0 = self.C3_p4(f_out0) # 1024->512/16
fpn_out1 = self.reduce_conv1(f_out0) # 512->256/16
f_out1 = self.upsample(fpn_out1) # 256/8
f_out1 = aitcat()([f_out1, x2], 3) # 256->512/8
pan_out2 = self.C3_p3(f_out1) # 512->256/8
p_out1 = self.bu_conv2(pan_out2) # 256->256/16
p_out1 = aitcat()([p_out1, fpn_out1], 3) # 256->512/16
pan_out1 = self.C3_n3(p_out1) # 512->512/16
p_out0 = self.bu_conv1(pan_out1) # 512->512/32
p_out0 = aitcat()([p_out0, fpn_out0], 3) # 512->1024/32
pan_out0 = self.C3_n4(p_out0) # 1024->1024/32
# outputs = (pan_out2, pan_out1, pan_out0)
return pan_out2
I have already replaced the convolution operation with aitemplate.frontend.nn.conv2dBias
and can compile successfully when only have f_out0
My error looks like this:
2023-07-17 19:50:15,384 DEBUG <aitemplate.compiler.transform.name_graph> before name_graph: func_cnt=101, tensor_cnt=0, len(func_name_to_tensor_cnt)=101, len(user_provided_dim)=546
2023-07-17 19:50:15,385 DEBUG <aitemplate.compiler.transform.name_graph> after name_graph: func_cnt=101, tensor_cnt=0, len(func_name_to_tensor_cnt)=101, len(user_provided_dim)=546
2023-07-17 19:50:15,565 DEBUG <aitemplate.utils.graph_utils> Dumped dedup_symbolic_name visualization to ./tmp/test_compile_yolox_backbone_1/dedup_symbolic_name_graph_vis.html
2023-07-17 19:50:15,569 INFO <aitemplate.compiler.transform.memory_planning> Workspace shared_size=0 unique_size=0
2023-07-17 19:50:15,569 INFO <aitemplate.compiler.transform.memory_planning> max_blob=3670016 constant_offset=0
Traceback (most recent call last):
File "/workspaces/torchsparse-misc/conversion/yolox/", line 72, in <module>
model_compiled = compile_module(
File "/workspaces/torchsparse-misc/conversion/yolox/", line 44, in compile_module
module = compile_model(y, target, "./tmp", model_name)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/utils/", line 93, in inner_function
return f(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/", line 308, in compile_model
_verify_outputs_still_in_graph(graph, output_tensors)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/", line 95, in _verify_outputs_still_in_graph
raise ValueError(
ValueError: Output output_0 was not found in the graph after optimizations.
Ran into a similar issue while working on RRDBNet.
def forward(self, x):
x1 = self.lrelu(self.conv1(x))
x2 = self.lrelu(self.conv2(, x1), 3)))
x3 = self.lrelu(self.conv3(, x1, x2), 3)))
x4 = self.lrelu(self.conv4(, x1, x2, x3), 3)))
x5 = self.conv5(, x1, x2, x3, x4), 3))
# Empirically, we use 0.2 to scale the residual for better performance
out = x5 * 0.2 + x
return out
When using ops.concatenate
directly it appears that the next conv layers are not used, only the first is used.
AIT dump (dump_ait_to_py):
def model(self):
conv2d_bias_0_0 = ops.conv2d_bias(dilate=1, group=1, pad=1, stride=1)(self.rdb_input, self.conv1_weight, self.conv1_bias)
# Set outputs
# End of setting outputs
Another indicator was that profiling only ran for the first conv layer.
~~Marking as is_input
resolves the issue.~~
Wrapping the operator appears to resolve the issue.
def cat(self, tensors, dim):
out = ops.concatenate()(tensors, dim)
return out