rvos
rvos copied to clipboard
hi, why the decoder need to loop T times
@carlesventura
Excuse me, could you tell me why the decoder need to loop T times?
for t in range(0, T): #prev_hidden_temporal_list is a list with the hidden state for all instances from previous time instant #If this is the first frame of the sequence, hidden_temporal is initialized to None. Otherwise, it is set with the value from previous time instant. if prev_hidden_temporal_list is not None: hidden_temporal = prev_hidden_temporal_list[t] if args.only_temporal: hidden_spatial = None else: hidden_temporal = None
#The decoder receives two hidden state variables: hidden_spatial (a tuple, with hidden_state and cell_state) which refers to the
#hidden state from the previous object instance from the same time instant, and hidden_temporal which refers to the hidden state from the same
#object instance from the previous time instant.
out_mask, hidden = decoder(feats, hidden_spatial, hidden_temporal)
hidden_tmp = []
for ss in range(len(hidden)):
if mode == 'train':
hidden_tmp.append(hidden[ss][0])
else:
hidden_tmp.append(hidden[ss][0].data)
hidden_spatial = hidden
hidden_temporal_list.append(hidden_tmp)
upsample_match = nn.UpsamplingBilinear2d(size=(x.size()[-2], x.size()[-1]))
out_mask = upsample_match(out_mask) # batch_size * 1 * height * width
out_mask = out_mask.view(out_mask.size(0), -1) # batch_size * height x width
# repeat predicted mask as many times as elements in ground truth.
# to compute iou against all ground truth elements at once
y_pred_i = out_mask.unsqueeze(0) # out_mask: batch_size * height x width -> 1 * batch_size * height x width
y_pred_i = y_pred_i.permute(1,0,2) # 1 * batch_size * height * width -> batch_size * 1 * height x width
y_pred_i = y_pred_i.repeat(1,y_mask.size(1),1)
y_pred_i = y_pred_i.view(y_mask.size(0)*y_mask.size(1),y_mask.size(2))# torch.Size([10, 10, 114688]) -> torch.Size([100, 114688])
y_true_p = y_mask.view(y_mask.size(0)*y_mask.size(1),y_mask.size(2))# torch.Size([100, 114688])
c = args.iou_weight * softIoU(y_true_p, y_pred_i)
c = c.view(sw_mask.size(0),-1)
scores[:,:,t] = c.cpu().data
# get predictions in list to concat later
out_masks.append(out_mask)
================================= Glad to waiting for your answer. Thanks! Best ragards, zwk