Vis-MVSNet
Vis-MVSNet copied to clipboard
Caught NanError in replica 0 on device 0.
Hello. I first run the command python sh/bld.py local
, the sh/dir.json
file is set as:
{
"local": {
"bld_dir": "dataset_low_res",
"dtu_dir": "",
"tnt_dir": "tankandtemples",
"tnt_training_dir": "",
"save_dir": "save",
"batch_size": 4,
"num_workers": 4,
"train_environ": "",
"val_environ": ""
}
}
Then I set the sh/bld_val.py
file as:
import os
import json
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('machine', type=str)
args = parser.parse_args()
with open('sh/dir.json') as f:
d = json.load(f)
d = d[args.machine]
for m in ['temp']:
for ns in range(3, 3+1):
cmd = f"""{d['val_environ']}
python val.py
--data_root {d['bld_dir']}
--dataset_name blended
--model_name model_cas
--num_src {ns}
--max_d 128
--interval_scale 1
--cas_depth_num 32,16,8
--cas_interv_scale 4,2,1
--resize 768,576
--crop 768,576
--mode soft
--load_path {d['save_dir']}/{m}
"""
cmd = ' '.join(cmd.strip().split())
print(cmd)
os.system(cmd)
Then, I run the command python sh/bld_val.py local
and get:
(four cards condition)
(base) bash-4.2$ python sh/bld_val.py local
python val.py --data_root dataset_low_res --dataset_name blended --model_name model_cas --num_src 3 --max_d 128 --interval_scale 1 --cas_depth_num 32,16,8 --cas_interv_scale 4,2,1 --resize 768,576 --crop 768,576 --mode soft --load_path save/temp
Number of samples: 915
Number of model parameters: 1162696
load save/temp/-1
0%| | 0/915 [00:23<?, ?it/s]
Traceback (most recent call last):
File "val.py", line 99, in <module>
outputs, refined_depth, prob_maps = model(sample, cas_depth_num, cas_interv_scale, mode=args.mode)
File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 161, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 171, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/parallel_apply.py", line 86, in parallel_apply
output.reraise()
File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/_utils.py", line 428, in reraise
raise self.exc_type(msg)
utils.utils.NanError: Caught NanError in replica 0 on device 0.
Original Traceback (most recent call last):
File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/parallel_apply.py", line 61, in _worker
output = module(*input, **kwargs)
File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/export/data/lwangcg/Vis-MVSNet/core/model_cas.py", line 419, in forward
est_depth_1, prob_map_1, pair_results_1 = self.stage1([ref_feat_1, ref_cam, srcs_feat_1, srcs_cam], depth_num=depth_nums[0], upsample=False, mem=mem, mode=mode, depth_start_override=None, depth_interval_override=depth_interval*interval_scales[0], s_scale=8)
File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/export/data/lwangcg/Vis-MVSNet/core/model_cas.py", line 317, in forward
warped_src = self.build_cost_volume(ref_feat, ref_cam, src_feat, src_cam, depth_num, depth_start, depth_interval, s_scale, d_scale)
File "/export/data/lwangcg/Vis-MVSNet/core/model_cas.py", line 179, in build_cost_volume
warped_src_nd_c_h_w = homography_warping(src_nd_c_h_w, Hs.view(-1, *Hs.size()[2:])) # n*d chw
File "/export/data/lwangcg/Vis-MVSNet/core/homography.py", line 103, in homography_warping
warped = interpolate(input, warped_coord)
File "/export/data/lwangcg/Vis-MVSNet/core/homography.py", line 93, in interpolate
raise NanError
utils.utils.NanError
(single card condition)
(base) bash-4.2$ CUDA_VISIBLE_DEVICES=0 python sh/bld_val.py local
python val.py --data_root dataset_low_res --dataset_name blended --model_name model_cas --num_src 3 --max_d 128 --interval_scale 1 --cas_depth_num 32,16,8 --cas_interv_scale 4,2,1 --resize 768,576 --crop 768,576 --mode soft --load_path save/temp
Number of samples: 915
Number of model parameters: 1162696
load save/temp/-1
0%| | 0/915 [00:00<?, ?it/s]
Traceback (most recent call last):
File "val.py", line 99, in <module>
outputs, refined_depth, prob_maps = model(sample, cas_depth_num, cas_interv_scale, mode=args.mode)
File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 159, in forward
return self.module(*inputs[0], **kwargs[0])
File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/export/data/lwangcg/Vis-MVSNet/core/model_cas.py", line 419, in forward
est_depth_1, prob_map_1, pair_results_1 = self.stage1([ref_feat_1, ref_cam, srcs_feat_1, srcs_cam], depth_num=depth_nums[0], upsample=False, mem=mem, mode=mode, depth_start_override=None, depth_interval_override=depth_interval*interval_scales[0], s_scale=8)
File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/export/data/lwangcg/Vis-MVSNet/core/model_cas.py", line 317, in forward
warped_src = self.build_cost_volume(ref_feat, ref_cam, src_feat, src_cam, depth_num, depth_start, depth_interval, s_scale, d_scale)
File "/export/data/lwangcg/Vis-MVSNet/core/model_cas.py", line 179, in build_cost_volume
warped_src_nd_c_h_w = homography_warping(src_nd_c_h_w, Hs.view(-1, *Hs.size()[2:])) # n*d chw
File "/export/data/lwangcg/Vis-MVSNet/core/homography.py", line 103, in homography_warping
warped = interpolate(input, warped_coord)
File "/export/data/lwangcg/Vis-MVSNet/core/homography.py", line 93, in interpolate
raise NanError
utils.utils.NanError
Could you please give me some help? Thank you!
yes we may encounter nan when training with blendedmvs. but it should have been caught in the training loop