deformable-kernels
deformable-kernels copied to clipboard
cuda runtime error (77) : an illegal memory access was encountered
Hi,
I was trying to replace Conv2d in a UNet with DeformConv2d and ran into some errors. I'm using CUDA 10.0
and pytorch 1.3.1
.
Below is the code for the UNet
class UNet(nn.Module):
def __init__(self, in_channels=4, out_channels=4):
super(UNet, self).__init__()
self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
self.conv1_1 = nn.Conv2d(in_channels, 32, kernel_size=3, stride=1, padding=1)
self.conv1_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
self.pool1 = nn.MaxPool2d(kernel_size=2)
self.conv2_1 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.conv2_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
self.pool2 = nn.MaxPool2d(kernel_size=2)
self.conv3_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.conv3_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
self.pool3 = nn.MaxPool2d(kernel_size=2)
self.conv4_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
self.conv4_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.pool4 = nn.MaxPool2d(kernel_size=2)
self.conv5_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.pool5 = nn.MaxPool2d(kernel_size=2)
self.up6 = nn.ConvTranspose2d(512, 256, 2, stride=2)
self.conv6_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1)
self.conv6_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.up7 = nn.ConvTranspose2d(256, 128, 2, stride=2)
self.conv7_1 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
self.conv7_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
self.up8 = nn.ConvTranspose2d(128, 64, 2, stride=2)
self.conv8_1 = nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1)
self.conv8_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
self.up9 = nn.ConvTranspose2d(64, 32, 2, stride=2)
self.conv9_1 = nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1)
self.conv9_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
#self.conv9_2 = DeformConv2d(32, 32, kernel_size=3, stride=1, padding=1, groups=32)
self.conv10 = nn.Conv2d(32, out_channels, kernel_size=1, stride=1, padding=0)
# Initialize weight
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal_(m.weight.data)
nn.init.constant_(m.bias.data, 0)
def forward(self, x):
conv1 = self.lrelu(self.conv1_1(x))
conv1 = self.lrelu(self.conv1_2(conv1))
pool1 = self.pool1(conv1)
conv2 = self.lrelu(self.conv2_1(pool1))
conv2 = self.lrelu(self.conv2_2(conv2))
pool2 = self.pool2(conv2)
conv3 = self.lrelu(self.conv3_1(pool2))
conv3 = self.lrelu(self.conv3_2(conv3))
pool3 = self.pool3(conv3)
conv4 = self.lrelu(self.conv4_1(pool3))
conv4 = self.lrelu(self.conv4_2(conv4))
pool4 = self.pool4(conv4)
conv5 = self.lrelu(self.conv5_1(pool4))
conv5 = self.lrelu(self.conv5_2(conv5))
up6 = self.up6(conv5)
up6 = torch.cat((up6, conv4), 1)
conv6 = self.lrelu(self.conv6_1(up6))
conv6 = self.lrelu(self.conv6_2(conv6))
up7 = self.up7(conv6)
up7 = torch.cat((up7, conv3), 1)
conv7 = self.lrelu(self.conv7_1(up7))
conv7 = self.lrelu(self.conv7_2(conv7))
up8 = self.up8(conv7)
up8 = torch.cat((up8, conv2), 1)
conv8 = self.lrelu(self.conv8_1(up8))
conv8 = self.lrelu(self.conv8_2(conv8))
up9 = self.up9(conv8)
up9 = torch.cat((up9, conv1), 1)
conv9 = self.lrelu(self.conv9_1(up9))
conv9 = self.lrelu(self.conv9_2(conv9))
out = self.conv10(conv9)
return out
This UNet model works fine. However, when I replace nn.Conv2d
in self.conv9_2
with DeformConv2d
(commented in the above code snippet), some error occurs:
105 self.optimizer.zero_grad()
106
--> 107 outputs = self.model(sources)
108
109 loss = self.criterion(outputs, targets)
~/anaconda3/envs/deformable-kernels/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
<ipython-input-2-b3122d7e7f84> in forward(self, x)
88 up9 = torch.cat((up9, conv1), 1)
89 conv9 = self.lrelu(self.conv9_1(up9))
---> 90 conv9 = self.lrelu(self.conv9_2(conv9))
91
92 out = self.conv10(conv9)
~/anaconda3/envs/deformable-kernels/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
~/anaconda3/envs/deformable-kernels/lib/python3.6/site-packages/torch/nn/modules/activation.py in forward(self, input)
533
534 def forward(self, input):
--> 535 return F.leaky_relu(input, self.negative_slope, self.inplace)
536
537 def extra_repr(self):
~/anaconda3/envs/deformable-kernels/lib/python3.6/site-packages/torch/nn/functional.py in leaky_relu(input, negative_slope, inplace)
1059 """
1060 if inplace:
-> 1061 result = torch._C._nn.leaky_relu_(input, negative_slope)
1062 else:
1063 result = torch._C._nn.leaky_relu(input, negative_slope)
RuntimeError: cuda runtime error (77) : an illegal memory access was encountered at /opt/conda/conda-bld/pytorch_1573049301898/work/aten/src/THCUNN/generic/LeakyReLU.cu:29
I also tried DeformKernel2d
and DeformKernelConv2d
and they both have the same issue.
Another thing worth mentioning is that the modified UNet is much slower than I expected. The running time for the original UNet is about 7 min per epoch, while the modified UNet needs almost twice the time to complete one epoch. Maybe there is something wrong when I install apex
or this package.
I'm facing a similar memory access issue, except the fact that it's not a cuda runtime error
. Instead I get a Segmentation fault (core dumped)
error.
@hangg7 any help would be greatly appreciated!