vision icon indicating copy to clipboard operation
vision copied to clipboard

Setting the tensors and ndarrays of the 2, 3 and 4 channels of `int64` and `int32` to `ToPILImage()` with `None` for `mode` argument gets errors

Open hyperkai opened this issue 8 months ago • 1 comments

🐛 Describe the bug

Setting the tensors and ndarrays of the 2, 3 and 4 channels of int64 and int32 to ToPILImage() with None for mode argument gets the errors as shown below:

from torchvision.transforms.v2 import ToPILImage
import torch
import numpy as np

tp = ToPILImage()
tp = ToPILImage(mode=None)

tp((torch.tensor([[[0]], [[1]]]), 0)) # int64
tp((torch.tensor([[[0]], [[1]], [[2]]]), 0)) #int64
tp((torch.tensor([[[0]], [[1]], [[2]], [[3]]]), 0)) #int64
tp((np.array([[[0, 1]]], dtype=np.int64), 0))
tp((np.array([[[0, 1, 2]]], dtype=np.int64), 0))
tp((np.array([[[0, 1, 2, 3]]], dtype=np.int64), 0))
# Error

tp((torch.tensor([[[0]], [[1]]], dtype=torch.int32), 0))
tp((torch.tensor([[[0]], [[1]], [[2]]], dtype=torch.int32), 0))
tp((torch.tensor([[[0]], [[1]], [[2]], [[3]]], dtype=torch.int32), 0))
tp((np.array([[[0, 1]]]), 0)) #int32
tp((np.array([[[0, 1, 2]]]), 0)) #int32
tp((np.array([[[0, 1, 2, 3]]]), 0)) #int32
# Error

TypeError: Input type int64 is not supported

TypeError: Input type int32 is not supported

But setting the tensors and ndarrays of the 2, 3 and 4 channels of int64 and int32 to ToPILImage() with 'LA', 'RGB', 'YCbCr', 'HSV', 'RGBA', 'CMYK' and 'RGBX' for mode argument works as shown below:

from torchvision.transforms.v2 import ToPILImage
import torch
import numpy as np

tp = ToPILImage(mode="LA")

tp((torch.tensor([[[0]], [[1]]]), 0)) # int64
tp((torch.tensor([[[0]], [[1]]], dtype=torch.int32), 0))
tp((np.array([[[0, 1]]], dtype=np.int64), 0))
tp((np.array([[[0, 1]]]), 0)) #int32
# (<PIL.Image.Image image mode=LA size=1x1>, 0)

tp = ToPILImage(mode="RGB")

tp((torch.tensor([[[0]], [[1]], [[2]]]), 0))
tp((torch.tensor([[[0]], [[1]], [[2]]], dtype=torch.int32), 0))
tp((np.array([[[0, 1, 2]]], dtype=np.int64), 0))
tp((np.array([[[0, 1, 2]]]), 0)) #int32
# (<PIL.Image.Image image mode=RGB size=1x1>, 0)

tp = ToPILImage(mode="YCbCr")

tp((torch.tensor([[[0]], [[1]], [[2]]]), 0))
tp((torch.tensor([[[0]], [[1]], [[2]]], dtype=torch.int32), 0))
tp((np.array([[[0, 1, 2]]], dtype=np.int64), 0))
tp((np.array([[[0, 1, 2]]]), 0)) #int32
# (<PIL.Image.Image image mode=YCbCr size=1x1>, 0)

tp = ToPILImage(mode="HSV")

tp((torch.tensor([[[0]], [[1]], [[2]]]), 0))
tp((torch.tensor([[[0]], [[1]], [[2]]], dtype=torch.int32), 0))
tp((np.array([[[0, 1, 2]]], dtype=np.int64), 0))
tp((np.array([[[0, 1, 2]]]), 0)) #int32
# (<PIL.Image.Image image mode=HSV size=1x1>, 0)

tp = ToPILImage(mode="RGBA")

tp((torch.tensor([[[0]], [[1]], [[2]], [[3]]]), 0))
tp((torch.tensor([[[0]], [[1]], [[2]], [[3]]], dtype=torch.int32), 0))
tp((np.array([[[0, 1, 2, 3]]], dtype=np.int64), 0))
tp((np.array([[[0, 1, 2, 3]]]), 0)) #int32
# (<PIL.Image.Image image mode=RGBA size=1x1>, 0)

tp = ToPILImage(mode="CMYK")

tp((torch.tensor([[[0]], [[1]], [[2]], [[3]]]), 0))
tp((torch.tensor([[[0]], [[1]], [[2]], [[3]]], dtype=torch.int32), 0))
tp((np.array([[[0, 1, 2, 3]]], dtype=np.int64), 0))
tp((np.array([[[0, 1, 2, 3]]]), 0)) #int32
# (<PIL.Image.Image image mode=CMYK size=1x1>, 0)

tp = ToPILImage(mode="RGBX")

tp((torch.tensor([[[0]], [[1]], [[2]], [[3]]]), 0))
tp((torch.tensor([[[0]], [[1]], [[2]], [[3]]], dtype=torch.int32), 0))
tp((np.array([[[0, 1, 2, 3]]], dtype=np.int64), 0))
tp((np.array([[[0, 1, 2, 3]]]), 0)) #int32
# (<PIL.Image.Image image mode=RGBX size=1x1>, 0)

In addition, setting the tensor and ndarray of the 1 channel of int64 to ToPILImage() with None and 'I' for mode argument gets the error as shown below:

from torchvision.transforms.v2 import ToPILImage
import torch
import numpy as np

tp = ToPILImage()
tp = ToPILImage(mode=None)
tp = ToPILImage(mode="I")

tp((torch.tensor([[[0]]]), 0)) # int64
tp((np.array([[[0]]], dtype=np.int64), 0))
# Error

ValueError: Incorrect mode (I) supplied for input type <class 'numpy.dtype'>. Should be None

But setting the tensor and ndarray of the 1 channel of int32 to ToPILImage() with None and 'I' for mode argument works as shown below:

from torchvision.transforms.v2 import ToPILImage
import torch
import numpy as np

tp = ToPILImage()
tp = ToPILImage(mode=None)
tp = ToPILImage(mode="I")

tp((torch.tensor([[[0]]], dtype=torch.int32), 0))
tp((np.array([[[0]]]), 0)) # int32
# (<PIL.Image.Image image mode=I size=1x1>, 0)

Versions

import torchvision

torchvision.__version__ # '0.20.1'

hyperkai avatar Apr 30 '25 03:04 hyperkai

Sorry, I'm struggling to really understand the core of the issue. Are all of the , 0 parameters relevant here? Would appreciate a strictly minimal reproducing error, thanks

NicolasHug avatar May 29 '25 12:05 NicolasHug