icevision
icevision copied to clipboard
Cannot open tif images when using COCO parser
🐛 Bug
Describe the bug
COCO parser uses PIL which cannot open tif
images with 3 float32 channels.
Stacktrace from a unit test that caught the bug for me :
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/conda/lib/python3.9/site-packages/pytorch_lightning/core/datamodule.py:474: in wrapped_fn
fn(*args, **kwargs)
../mdai_core/datasets/image/coco_data_module.py:158: in setup
self.test_dataset = self.create_dataset(
../mdai_core/datasets/image/coco_data_module.py:186: in create_dataset
records, *_ = parser.parse(data_splitter=data_splitter, autofix=True)
/opt/conda/lib/python3.9/site-packages/icevision/parsers/parser.py:119: in parse
records = self.parse_dicted(show_pbar=show_pbar)
/opt/conda/lib/python3.9/site-packages/icevision/parsers/parser.py:77: in parse_dicted
self.parse_fields(sample, record=record, is_new=record.is_new)
/opt/conda/lib/python3.9/site-packages/icevision/parsers/coco_parser.py:101: in parse_fields
super().parse_fields(o, record, is_new=is_new)
/opt/conda/lib/python3.9/site-packages/icevision/parsers/coco_parser.py:82: in parse_fields
record.set_img_size(self.img_size(o))
/opt/conda/lib/python3.9/site-packages/icevision/parsers/coco_parser.py:68: in img_size
return get_img_size(self.filepath(o))
/opt/conda/lib/python3.9/site-packages/icevision/utils/imageio.py:94: in get_img_size
with PIL.Image.open(str(filepath)) as image:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
fp = <_io.BufferedReader name='temp-9ba11f6f-a8e0-4102-9c07-d6eaafffef76/img-fc426b36-8bc4-4cd3-a15d-5b2c4504f661.tiff'>, mode = 'r'
formats = ['PNG', 'BMP', 'DIB', 'GIF', 'TIFF', 'JPEG', ...]
def open(fp, mode="r", formats=None):
"""
Opens and identifies the given image file.
This is a lazy operation; this function identifies the file, but
the file remains open and the actual image data is not read from
the file until you try to process the data (or call the
:py:meth:`~PIL.Image.Image.load` method). See
:py:func:`~PIL.Image.new`. See :ref:`file-handling`.
:param fp: A filename (string), pathlib.Path object or a file object.
The file object must implement ``file.read``,
``file.seek``, and ``file.tell`` methods,
and be opened in binary mode.
:param mode: The mode. If given, this argument must be "r".
:param formats: A list or tuple of formats to attempt to load the file in.
This can be used to restrict the set of formats checked.
Pass ``None`` to try all supported formats. You can print the set of
available formats by running ``python3 -m PIL`` or using
the :py:func:`PIL.features.pilinfo` function.
:returns: An :py:class:`~PIL.Image.Image` object.
:exception FileNotFoundError: If the file cannot be found.
:exception PIL.UnidentifiedImageError: If the image cannot be opened and
identified.
:exception ValueError: If the ``mode`` is not "r", or if a ``StringIO``
instance is used for ``fp``.
:exception TypeError: If ``formats`` is not ``None``, a list or a tuple.
"""
if mode != "r":
raise ValueError(f"bad mode {repr(mode)}")
elif isinstance(fp, io.StringIO):
raise ValueError(
"StringIO cannot be used to open an image. "
"Binary data must be used instead."
)
if formats is None:
formats = ID
elif not isinstance(formats, (list, tuple)):
raise TypeError("formats must be a list or tuple")
exclusive_fp = False
filename = ""
if isinstance(fp, Path):
filename = str(fp.resolve())
elif isPath(fp):
filename = fp
if filename:
fp = builtins.open(filename, "rb")
exclusive_fp = True
try:
fp.seek(0)
except (AttributeError, io.UnsupportedOperation):
fp = io.BytesIO(fp.read())
exclusive_fp = True
prefix = fp.read(16)
preinit()
accept_warnings = []
def _open_core(fp, filename, prefix, formats):
for i in formats:
i = i.upper()
if i not in OPEN:
init()
try:
factory, accept = OPEN[i]
result = not accept or accept(prefix)
if type(result) in [str, bytes]:
accept_warnings.append(result)
elif result:
fp.seek(0)
im = factory(fp, filename)
_decompression_bomb_check(im.size)
return im
except (SyntaxError, IndexError, TypeError, struct.error):
# Leave disabled by default, spams the logs with image
# opening failures that are entirely expected.
# logger.debug("", exc_info=True)
continue
except BaseException:
if exclusive_fp:
fp.close()
raise
return None
im = _open_core(fp, filename, prefix, formats)
if im is None:
if init():
im = _open_core(fp, filename, prefix, formats)
if im:
im._exclusive_fp = exclusive_fp
return im
if exclusive_fp:
fp.close()
for message in accept_warnings:
warnings.warn(message)
> raise UnidentifiedImageError(
"cannot identify image file %r" % (filename if filename else fp)
)
E PIL.UnidentifiedImageError: cannot identify image file 'temp-9ba11f6f-a8e0-4102-9c07-d6eaafffef76/img-fc426b36-8bc4-4cd3-a15d-5b2c4504f661.tiff'
To Reproduce Steps to reproduce the behavior:
- Create a tif image with 3 bands of dtype float32 (eg: using
rasterio
) - Create a COCO dataset for this image
- Use coco parser to parse the data
Expected behavior I expect to be able to parse COCO dataset with tif image format
Desktop (please complete the following information):
- OS: ubuntu 18.04