RoIAlign.pytorch
RoIAlign.pytorch copied to clipboard
crop a patch from image
I write a simple test code code 1
as follows. I first generate a 3D ndarray randomly, and add a new dimensionality to represente the batch_size dimension. The box is set to [0, 0, 3, 3], and the croped width and heght of RoIAlign are set to 3 and 3, respectively. And the output is what I want.
code 1
import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt
from roialign.roi_align.roi_align import RoIAlign
import cv2
def to_varabile(arr, requires_grad=False, is_cuda=True):
tensor = torch.from_numpy(arr)
if is_cuda:
tensor = tensor.cuda()
var = Variable(tensor, requires_grad=requires_grad)
return var
# the data you want
is_cuda = False
# --------------- image data generation ---------------------------
image_data = np.random.randn(3, 7, 5) * 100
image_data = np.asarray(image_data, dtype=np.float32)
image_data = image_data[np.newaxis]
# -------------- end of image data generation---------------------
boxes_data = np.asarray([[0, 0, 3, 3]], dtype=np.float32)
box_index_data = np.asarray([0], dtype=np.int32)
image_torch = to_varabile(image_data, requires_grad=True, is_cuda=is_cuda)
boxes = to_varabile(boxes_data, requires_grad=False, is_cuda=is_cuda)
box_index = to_varabile(box_index_data, requires_grad=False, is_cuda=is_cuda)
# set transform_fpcoor to False is the crop_and_resize
roi_align = RoIAlign(crop_width=3, crop_height=3, transform_fpcoor=True)
croped = roi_align(image_torch, boxes, box_index)
print(image_torch, '\n')
print(croped)
output 1
tensor([[[[ 155.2937, 27.9904, 74.7080, 66.1174, 34.2396],
[-140.2568, 164.4559, -88.8006, -37.5217, 156.6059],
[ 44.5218, 1.7729, 146.5069, 149.6736, 5.5269],
[-222.4462, 65.9821, -52.1707, -145.4467, -49.2179],
[ -96.1867, -40.4554, -25.5354, 75.1842, -69.6537],
[ 12.1659, -15.9438, 0.2947, -55.0050, 9.0175],
[ -23.8114, -9.1514, -11.3899, -33.1432, -120.8428]],
[[ 78.6541, -1.6860, -81.5798, -46.7906, -41.1748],
[-106.7531, -40.6971, 15.0387, 50.8834, -122.1978],
[ -52.3712, -2.0634, 80.5198, 92.6046, 30.9877],
[ -44.0200, 34.4229, 83.7537, -53.1896, 68.1574],
[ 11.3319, -117.5049, -28.6529, 52.9562, -59.5388],
[ 104.2405, 148.5067, -51.5808, -82.6794, 104.9984],
[ 46.2745, -140.3886, -134.5971, -106.4377, 146.3157]],
[[ -12.9685, -22.6475, 42.1217, -71.6224, 153.7339],
[ -1.0168, -76.6128, 4.3941, -157.5561, -93.8834],
[ 7.0985, 21.7129, -2.5151, 272.5159, -74.3853],
[ 35.7283, 106.1746, -112.5265, -13.9350, 26.7033],
[ -36.1484, -51.4258, -78.5966, 123.6539, -83.3441],
[ 60.9276, 63.7939, -42.0873, 207.5641, 58.5911],
[ 28.2906, 25.9808, 95.6436, 211.1584, -151.0635]]]])
tensor([[[[ 155.2937, 27.9904, 74.7080],
[-140.2568, 164.4559, -88.8006],
[ 44.5218, 1.7729, 146.5069]],
[[ 78.6541, -1.6860, -81.5798],
[-106.7531, -40.6971, 15.0387],
[ -52.3712, -2.0634, 80.5198]],
[[ -12.9685, -22.6475, 42.1217],
[ -1.0168, -76.6128, 4.3941],
[ 7.0985, 21.7129, -2.5151]]]])
Next, I modify the code 1
, and I get code 2
. What I did is just change the generation of image dada. Instead generate randomly, I load a image from the disk, and pick a small patch from the image. But there are some thing wrong. code 2
and output are as follows:
code 2
import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt
from roialign.roi_align.roi_align import RoIAlign
import cv2
def to_varabile(arr, requires_grad=False, is_cuda=True):
tensor = torch.from_numpy(arr)
if is_cuda:
tensor = tensor.cuda()
var = Variable(tensor, requires_grad=requires_grad)
return var
# the data you want
is_cuda = False
# ------------------------- image data generation ---------------------------------
frame_path = '/data0/liuqk/MOTChallenge/2DMOT2015/train/TUD-Campus/img1/000068.jpg'
image_data = plt.imread(frame_path) # HxWxC
image_data = image_data[200:207, 200:205, :]
image_data = np.transpose(image_data, (2, 0, 1)) # CxHxW
image_data = np.asarray(image_data, dtype=np.float32)
image_data = image_data[np.newaxis]
# ------------------------- end of image data generation --------------------------
# image_data = image_data[np.newaxis, np.newaxis]
boxes_data = np.asarray([[0, 0, 3, 3]], dtype=np.float32)
box_index_data = np.asarray([0], dtype=np.int32)
image_torch = to_varabile(image_data, requires_grad=True, is_cuda=is_cuda)
boxes = to_varabile(boxes_data, requires_grad=False, is_cuda=is_cuda)
box_index = to_varabile(box_index_data, requires_grad=False, is_cuda=is_cuda)
# set transform_fpcoor to False is the crop_and_resize
roi_align = RoIAlign(crop_width=3, crop_height=3, transform_fpcoor=True)
croped = roi_align(image_torch, boxes, box_index)
print(image_torch, '\n')
print(croped)
output 2
tensor([[[[ 67., 67., 66., 66., 64.],
[ 67., 67., 66., 64., 64.],
[ 67., 67., 65., 64., 65.],
[ 67., 65., 65., 65., 65.],
[ 64., 64., 64., 64., 64.],
[ 63., 63., 63., 63., 63.],
[ 62., 62., 62., 62., 62.]],
[[ 65., 65., 64., 64., 64.],
[ 64., 64., 63., 64., 64.],
[ 64., 64., 65., 64., 65.],
[ 64., 65., 65., 65., 65.],
[ 64., 64., 64., 64., 64.],
[ 63., 63., 63., 63., 63.],
[ 62., 62., 62., 62., 62.]],
[[ 53., 53., 52., 52., 52.],
[ 55., 55., 54., 54., 54.],
[ 55., 55., 55., 54., 55.],
[ 55., 55., 55., 55., 55.],
[ 54., 54., 54., 54., 54.],
[ 53., 53., 53., 53., 53.],
[ 52., 52., 52., 52., 52.]]]])
tensor([[[[ 67., 65., 53.],
[ 53., 66., 64.],
[ 64., 52., 64.]],
[[ 55., 65., 65.],
[ 64., 54., 65.],
[ 67., 64., 55.]],
[[ 64., 54., 64.],
[ 63., 63., 53.],
[ 53., 63., 63.]]]])
As you can see, the out put is not what I want and I can not figure out how RoIAlign works here. So I am wondering why this happening? Can anyone tell me please?
The model didn't check if a tensor is contiguous. This will result in wrong crops as you mentioned.
The way to fix this:
# tensor = torch.from_numpy(arr) # Replace to
tensor = torch.from_numpy(arr).contiguous()
# OR
# image_data = np.asarray(image_data, dtype=np.float32) # Replace to
image_data = np.ascontiguousarray(image_data, dtype=np.float32)
Then I got the right crop:
tensor([[[[67., 67., 66., 66., 64.],
[67., 67., 66., 64., 64.],
[67., 67., 65., 64., 65.],
[67., 65., 65., 65., 65.],
[64., 64., 64., 64., 64.],
[63., 63., 63., 63., 63.],
[62., 62., 62., 62., 62.]],
[[65., 65., 64., 64., 64.],
[64., 64., 63., 64., 64.],
[64., 64., 65., 64., 65.],
[64., 65., 65., 65., 65.],
[64., 64., 64., 64., 64.],
[63., 63., 63., 63., 63.],
[62., 62., 62., 62., 62.]],
[[53., 53., 52., 52., 52.],
[55., 55., 54., 54., 54.],
[55., 55., 55., 54., 55.],
[55., 55., 55., 55., 55.],
[54., 54., 54., 54., 54.],
[53., 53., 53., 53., 53.],
[52., 52., 52., 52., 52.]]]], requires_grad=True)
tensor([[[[67., 67., 66.],
[67., 67., 66.],
[67., 67., 65.]],
[[65., 65., 64.],
[64., 64., 63.],
[64., 64., 65.]],
[[53., 53., 52.],
[55., 55., 54.],
[55., 55., 55.]]]], grad_fn=<CropAndResizeFunction>)
Sorry for the mistakes.
@longcw Thank you very much! You have solved my problem :)