SlicerMONAIViz icon indicating copy to clipboard operation
SlicerMONAIViz copied to clipboard

monai unet training 2nd error on hepatic vessels dataset

Open Alisoltan82 opened this issue 1 year ago • 0 comments

transformation

train_transforms = Compose(
    [
        LoadImaged(keys=["image", "label"]),
        EnsureChannelFirstd(keys=["image", "label"]),
        ScaleIntensityRanged(
            keys=["image"],
            a_min=-1024.0,
            a_max=1906.0,
            b_min=0.0,
            b_max=1.0,
            clip=True,
        ),
            Spacingd(keys=["image", "label"], pixdim=(1.5, 1.5, 2.0), mode=("bilinear", "nearest")),
            Orientationd(keys=["image", "label"], axcodes="RAS"),
            Resized(keys=["image", "label"],spatial_size = (240,240,128)),
            DivisiblePadd(keys=["image", "label"], k = 64),
            RandCropByPosNegLabeld(
                keys=["image", "label"],
                label_key="label",
                spatial_size=(96, 96, 96),
                pos=1,
                neg=1,
                num_samples=4,
                image_key="image",
                image_threshold=0 ) ])
val_transforms = Compose(
    [
        LoadImaged(keys=["image", "label"]),
        EnsureChannelFirstd(keys=["image", "label"]),
        ScaleIntensityRanged(
            keys=["image"],
            a_min=-1024.0,
            a_max=1906.0,
            b_min=0.0,
            b_max=1.0,
            clip=True),
       
        #CropForegroundd(keys=["image", "label"], source_key="image"),
        
        Orientationd(keys=["image", "label"], axcodes="RAS"),
        Spacingd(keys=["image", "label"], pixdim=(1.5, 1.5, 2.0), mode=("bilinear", "nearest")),
        Resized(keys=["image", "label"],spatial_size = (240,240,128)),
        DivisiblePadd(keys=["image", "label"],k = 64)])

data shape from loader

data = first(train_loader)
data['image'].shape , data['label'].shape

(torch.Size([4, 1, 96, 96, 96]), torch.Size([4, 1, 96, 96, 96]))

Model:

model = UNet(
    spatial_dims=3,
    in_channels=1,
    out_channels=3,
    channels=(16, 32, 64, 128, 256),
    strides=(2, 2, 2, 2),
    num_res_units=2,
    norm=Norm.BATCH,
).to(device)
----------
epoch 1/100
epoch 1 average loss: 0.7484
----------
epoch 2/100
epoch 2 average loss: 0.7053
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1629,0,0], thread: [64,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1629,0,0], thread: [68,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1629,0,0], thread: [72,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1312,0,0], thread: [65,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1630,0,0], thread: [64,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1630,0,0], thread: [68,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1630,0,0], thread: [72,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1630,0,0], thread: [76,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1762,0,0], thread: [65,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1762,0,0], thread: [69,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1762,0,0], thread: [71,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1762,0,0], thread: [73,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1762,0,0], thread: [77,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1762,0,0], thread: [81,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1628,0,0], thread: [64,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1628,0,0], thread: [67,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1628,0,0], thread: [68,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1628,0,0], thread: [71,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1628,0,0], thread: [72,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1761,0,0], thread: [29,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1628,0,0], thread: [56,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1628,0,0], thread: [59,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1628,0,0], thread: [60,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1312,0,0], thread: [50,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1312,0,0], thread: [54,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1312,0,0], thread: [58,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1312,0,0], thread: [62,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1630,0,0], thread: [52,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1630,0,0], thread: [56,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1630,0,0], thread: [60,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1761,0,0], thread: [65,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1761,0,0], thread: [69,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1761,0,0], thread: [71,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1761,0,0], thread: [73,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1761,0,0], thread: [77,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1895,0,0], thread: [64,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1894,0,0], thread: [64,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1894,0,0], thread: [68,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1894,0,0], thread: [72,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2018,0,0], thread: [65,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2018,0,0], thread: [67,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2018,0,0], thread: [69,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2018,0,0], thread: [71,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2018,0,0], thread: [73,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2018,0,0], thread: [75,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2018,0,0], thread: [77,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2018,0,0], thread: [81,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2018,0,0], thread: [85,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [1894,0,0], thread: [60,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2021,0,0], thread: [49,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2021,0,0], thread: [53,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2021,0,0], thread: [57,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2021,0,0], thread: [61,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2017,0,0], thread: [45,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2017,0,0], thread: [49,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2017,0,0], thread: [53,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2017,0,0], thread: [55,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2017,0,0], thread: [57,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2017,0,0], thread: [59,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2017,0,0], thread: [61,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [2017,0,0], thread: [63,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
File <timed exec>:48

File /opt/conda/lib/python3.10/site-packages/monai/metrics/metric.py:344, in CumulativeIterationMetric.__call__(self, y_pred, y, **kwargs)
    324 def __call__(
    325     self, y_pred: TensorOrList, y: TensorOrList | None = None, **kwargs: Any
    326 ) -> torch.Tensor | Sequence[torch.Tensor | Sequence[torch.Tensor]]:
    327     """
    328     Execute basic computation for model prediction and ground truth.
    329     It can support  both `list of channel-first Tensor` and `batch-first Tensor`.
   (...)
    342         a `batch-first` tensor (BC[HWD]) or a list of `batch-first` tensors.
    343     """
--> 344     ret = super().__call__(y_pred=y_pred, y=y, **kwargs)
    345     if isinstance(ret, (tuple, list)):
    346         self.extend(*ret)

File /opt/conda/lib/python3.10/site-packages/monai/metrics/metric.py:73, in IterationMetric.__call__(self, y_pred, y, **kwargs)
     71 # handling a list of channel-first data
     72 if isinstance(y_pred, (list, tuple)) or isinstance(y, (list, tuple)):
---> 73     return self._compute_list(y_pred, y, **kwargs)
     74 # handling a single batch-first data
     75 if isinstance(y_pred, torch.Tensor):

File /opt/conda/lib/python3.10/site-packages/monai/metrics/metric.py:97, in IterationMetric._compute_list(self, y_pred, y, **kwargs)
     83 """
     84 Execute the metric computation for `y_pred` and `y` in a list of "channel-first" tensors.
     85 
   (...)
     94 Note: subclass may enhance the operation to have multi-thread support.
     95 """
     96 if y is not None:
---> 97     ret = [
     98         self._compute_tensor(p.detach().unsqueeze(0), y_.detach().unsqueeze(0), **kwargs)
     99         for p, y_ in zip(y_pred, y)
    100     ]
    101 else:
    102     ret = [self._compute_tensor(p_.detach().unsqueeze(0), None, **kwargs) for p_ in y_pred]

File /opt/conda/lib/python3.10/site-packages/monai/metrics/metric.py:98, in <listcomp>(.0)
     83 """
     84 Execute the metric computation for `y_pred` and `y` in a list of "channel-first" tensors.
     85 
   (...)
     94 Note: subclass may enhance the operation to have multi-thread support.
     95 """
     96 if y is not None:
     97     ret = [
---> 98         self._compute_tensor(p.detach().unsqueeze(0), y_.detach().unsqueeze(0), **kwargs)
     99         for p, y_ in zip(y_pred, y)
    100     ]
    101 else:
    102     ret = [self._compute_tensor(p_.detach().unsqueeze(0), None, **kwargs) for p_ in y_pred]

File /opt/conda/lib/python3.10/site-packages/monai/metrics/meandice.py:95, in DiceMetric._compute_tensor(self, y_pred, y)
     93     raise ValueError(f"y_pred should have at least 3 dimensions (batch, channel, spatial), got {dims}.")
     94 # compute dice (BxC) for each channel for each batch
---> 95 return self.dice_helper(y_pred=y_pred, y=y)

File /opt/conda/lib/python3.10/site-packages/monai/metrics/meandice.py:260, in DiceHelper.__call__(self, y_pred, y)
    258         x_pred = (y_pred[b, 0] == c) if (y_pred.shape[1] == 1) else y_pred[b, c].bool()
    259         x = (y[b, 0] == c) if (y.shape[1] == 1) else y[b, c]
--> 260         c_list.append(self.compute_channel(x_pred, x))
    261     data.append(torch.stack(c_list))
    262 data = torch.stack(data, dim=0).contiguous()  # type: ignore

File /opt/conda/lib/python3.10/site-packages/monai/metrics/meandice.py:219, in DiceHelper.compute_channel(self, y_pred, y)
    217 """"""
    218 y_o = torch.sum(y)
--> 219 if y_o > 0:
    220     return (2.0 * torch.sum(torch.masked_select(y, y_pred))) / (y_o + torch.sum(y_pred))
    221 if self.ignore_empty:

File /opt/conda/lib/python3.10/site-packages/monai/data/meta_tensor.py:282, in MetaTensor.__torch_function__(cls, func, types, args, kwargs)
    280 if kwargs is None:
    281     kwargs = {}
--> 282 ret = super().__torch_function__(func, types, args, kwargs)
    283 # if `out` has been used as argument, metadata is not copied, nothing to do.
    284 # if "out" in kwargs:
    285 #     return ret
    286 if _not_requiring_metadata(ret):

File /opt/conda/lib/python3.10/site-packages/torch/_tensor.py:1295, in Tensor.__torch_function__(cls, func, types, args, kwargs)
   1292     return NotImplemented
   1294 with _C.DisableTorchFunctionSubclass():
-> 1295     ret = func(*args, **kwargs)
   1296     if func in get_default_nowrap_functions():
   1297         return ret

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Alisoltan82 avatar Jan 23 '24 11:01 Alisoltan82