Estimating-Depth-from-RGB-and-Sparse-Sensing
Estimating-Depth-from-RGB-and-Sparse-Sensing copied to clipboard
The NYU_V2 dataloader in train.py seems strange.
Looking through the code, I found a problem.
When you train, you need to get the image and raw depth as sparse_input, and get the depth completed by depth completion as output.
However, if you look at the data loader code, it just calls ["depths"] instead of ["rawDepths"]. This doesn't make sense because there is actually a ground truth in the input. Does it make sense to compare the output of a model with a GT as an input to the GT again?
In your code,
class NYU_V2(Dataset):
def __init__(self, trn_tst=0, transform=None):
data = h5py.File('./nyu_depth_v2_labeled.mat')
if trn_tst == 0:
#trainloader
self.images = data["images"][0:1400]
self.depths = data["depths"][0:1400]
else:
#testloader
self.images = data["images"][1401:]
self.depths = data["depths"][1401:]
self.transform = transform
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
sample = self.images[idx, :]
s_depth = self.depths[idx, :]
sample = torch.from_numpy(np.transpose(sample, (2, 1, 0)))
s_depth= torch.from_numpy(np.transpose(s_depth, (1, 0)))
return sample.float(), s_depth.float()
In code I modified
class NYU_V2(Dataset):
def __init__(self, mode="train", transform=None, path="./nyu_depth_v2_labeled.mat"):
f = h5py.File(path)
self.images = f["images"]
self.raw_depths = f["rawDepths"]
self.gt_depths = f["depths"]
self.transform = transform
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
# Data
img = self.images[idx, :]
img = torch.from_numpy(np.transpose(img, (2, 1, 0)))
raw_depth = self.raw_depths[idx, :]
raw_depth = torch.from_numpy(np.transpose(raw_depth, (1,0)))
# Label
depth = self.gt_depths[idx, :]
depth = torch.from_numpy(np.transpose(depth, (1,0)))
return (img.float(), raw_depth.float()), depth.float()