Utilize the GrowliFlower Datasets to Semantic Segmentation

Open chenfh21 opened this issue 1 year ago • 1 comments

``https://github.com/PRBonn/phenobench-baselines/blob/78db625441e54c5b64efabeb0d886d020961665b/panoptic_segmentation/panopticdeeplab/src/tools/datasets.py#L325 Hi, I want to use the Growliflower dataset for comparing semantic segmentation tasks. I've rewritten the classes according to the semantic segmentation baseline, but there might be an error in the labeling. Is this an issue with the dataset, or is it because the label format of this dataset doesn't match the image data format of Phenobench?

import os.path

import pytorch_lightning as pl from pytorch_lightning.utilities.types import EVAL_DATALOADERS from torch.utils.data import Dataset, DataLoader

from pdc_datasets.augmentations_geometry import * from pdc_datasets.augmentations_normalize import * from pdc_datasets.augmentations_color import *

class GrowliFlower(Dataset): def init(self, path_to_dataset: str, mode: str, img_normalizer: ImageNormalizer, augmentations_geometric: List[GeometricDataAugmentation], augmentations_color: List[Callable]): """ Get the path to all images and its corresponding annotations.

    Args:
        path_to_dataset (str): Path to dir that contains the images and annotations
        mode (str): train, val, test
        img_normalizer (ImageNormalizer): Specifics how to normalize the input images
        augmentations_geometric (List[GeometricDataAugmentation]): Geometric data augmentations applied to the image
                                                                   and its annotations
        augmentations_color (List[Callable]): Color data augmentations applied to the image
    """

    assert os.path.exists(path_to_dataset), f"The path to the dataset does not exist: {path_to_dataset}."

    super().__init__()

    assert mode in ['train', 'val', 'test']
    self.mode = mode

    self.img_normalizer = img_normalizer
    self.augmentations_geometric = augmentations_geometric
    self.augmentations_color = augmentations_color

    # --------------------------------Prepare Training------------------------------------#
    self.path_to_train_images = os.path.join(path_to_dataset, "images", "Train")
    self.path_to_train_annos = os.path.join(self.path_to_train_images.replace("images", "labels"), "maskPlants")
    self.filenames_train = get_img_fnames_in_dir(self.path_to_train_images)  # ['patch_2020_08_12_29604.jpg'...]
    self.filenames_t_anno = get_img_fnames_in_dir(self.path_to_train_annos)

    # --------------------------------Prepare Validation----------------------------------#
    self.path_to_val_images = os.path.join(path_to_dataset, "images", "Val")
    self.path_to_val_annos = os.path.join(self.path_to_val_images.replace("images", "labels"), "maskPlants")
    self.filenames_val = get_img_fnames_in_dir(self.path_to_val_images)  # ['patch_2020_08_12_29604.jpg'...]
    self.filenames_v_anno = get_img_fnames_in_dir(self.path_to_val_annos)

    # --------------------------------Prepare Testing-----------------------------------#
    self.path_to_test_images = os.path.join(path_to_dataset, "images", "Test")
    self.path_to_test_annos = os.path.join(self.path_to_test_images.replace("images", "labels"), "maskPlants")
    self.filenames_test = get_img_fnames_in_dir(self.path_to_test_images)  # ['patch_2020_08_12_29604.jpg'...]
    self.filenames_e_anno = get_img_fnames_in_dir(self.path_to_test_annos)

    # specify image transformations
    self.img_to_tensor = transforms.ToTensor()

def get_train_item(self, idx: int) -> Dict:
    path_to_current_img = os.path.join(self.path_to_train_images, self.filenames_train[idx])
    img_pil = Image.open(path_to_current_img)
    img = self.img_to_tensor(img_pil)  # [C x H x W] with values in [0, 1]

    if random.random() > 0.25:
        for augmentor_color_fn in self.augmentations_color:
            img = augmentor_color_fn(img)

    current_anno = [f for f in self.filenames_t_anno if os.path.splitext(self.filenames_train[idx])[0] in f][0]
    path_to_current_anno = os.path.join(self.path_to_train_annos, current_anno)
    anno = np.array(Image.open(path_to_current_anno))  # dtype: int32
    if len(anno.shape) > 2:
        anno = anno[:, :, 0]
    anno = anno.astype(np.int64)
    anno = torch.Tensor(anno).type(torch.int64)  # [H x W]
    anno = anno.unsqueeze(0)  # [1 x H x W]

    for augmentor_geometric in self.augmentations_geometric:
        img, anno = augmentor_geometric(img, anno)
    anno = anno.squeeze()  # [H x W]

    mask_3 = anno == 3
    anno[mask_3] = 1

    mask_4 = anno == 4
    anno[mask_4] = 2

    img_before_norm = img.clone()
    img = self.img_normalizer.normalize(img)

    return {'input_image_before_norm': img_before_norm,
            'input_image': img,
            'anno': anno,
            'fname': self.filenames_train[idx]}

def get_val_item(self, idx: int) -> Dict:
    path_to_current_img = os.path.join(self.path_to_val_images, self.filenames_val[idx])
    img_pil = Image.open(path_to_current_img)
    img = self.img_to_tensor(img_pil)  # [C x H x W] with values in [0,1]

    current_anno = [f for f in self.filenames_v_anno if os.path.splitext(self.filenames_val[idx])[0] in f][0]
    path_to_current_anno = os.path.join(self.path_to_val_annos, current_anno)
    anno = np.array(Image.open(path_to_current_anno))  # dtype: int32
    if len(anno.shape) > 2:
        anno = anno[:, :, 0]
    anno = anno.astype(np.int64)
    anno = torch.Tensor(anno).type(torch.int64)  # [H x W]

    img_before_norm = img.clone()
    img = self.img_normalizer.normalize(img)

    mask_3 = anno == 3
    anno[mask_3] = 1

    mask_4 = anno == 4
    anno[mask_4] = 2

    return {'input_image_before_norm': img_before_norm,
            'input_image': img,
            'anno': anno,
            'fname': self.filenames_val[idx]}

def get_test_item(self, idx: int) -> Dict:
    path_to_current_img = os.path.join(self.path_to_test_images, self.filenames_test[idx])
    img_pil = Image.open(path_to_current_img)
    img = self.img_to_tensor(img_pil)  # [C x H x W] with values in [0, 1]

    current_anno = [f for f in self.filenames_e_anno if os.path.splitext(self.filenames_test[idx])[0] in f][0]
    path_to_current_anno = os.path.join(self.path_to_test_annos, current_anno)
    anno = np.array(Image.open(path_to_current_anno))  # dtype: int32
    if len(anno.shape) > 2:
        anno = anno[:, :, 0]
    anno = anno.astype(np.int64)
    anno = torch.Tensor(anno).type(torch.int64)  # [H x W]

    img_before_norm = img.clone()
    img = self.img_normalizer.normalize(img)

    mask_3 = anno == 3
    anno[mask_3] = 1

    mask_4 = anno == 4
    anno[mask_4] = 2

    return {'input_image_before_norm': img_before_norm,
            'input_image': img,
            'anno': anno,
            'fname': self.filenames_test[idx]}

def __getitem__(self, idx: int):
    if self.mode == 'train':
        items = self.get_train_item(idx)
        return items

    if self.mode == 'val':
        items = self.get_val_item(idx)
        return items

    if self.mode == 'test':
        items = self.get_test_item(idx)
        return items

def __len__(self):
    if self.mode == 'train':
        return len(self.filenames_train)

    if self.mode == 'val':
        return len(self.filenames_val)

    if self.mode == 'test':
        return len(self.filenames_test)

class GrowliFlowerModule(pl.LightningDataModule): """ Encapsulates all the methods needed to process data from the GrowliFlower dataset. """

def __init__(self, cfg: Dict):
    super().__init__()

    self.cfg = cfg

def setup(self, stage: Optional[str] = None):
    """ Data operations we perform on every GPU.

    Here we define the how to split the dataset.

    Args:
        stage(Optional[str], optional): _description_. Defaults to None.
    """
    path_to_dataset = self.cfg['data']['path_to_dataset']
    image_normalizer = get_image_normalizer(self.cfg)

    if (stage == 'fit') or (stage == 'validate') or (stage is None):
        # ------------------------TRAIN-----------------------------
        train_augmentations_geometric = get_geometric_augmentations(self.cfg, 'train')
        train_augmentations_color = get_color_augmentations(self.cfg, 'train')
        self.train_ds = GrowliFlower(
            path_to_dataset,
            mode='train',
            img_normalizer=image_normalizer,
            augmentations_geometric=train_augmentations_geometric,
            augmentations_color=train_augmentations_color
        )

        # -----------------------VAL--------------------------------
        val_augmentations_geometric = get_geometric_augmentations(self.cfg, 'val')
        self.val_ds = GrowliFlower(
            path_to_dataset,
            mode='val',
            img_normalizer=image_normalizer,
            augmentations_geometric=val_augmentations_geometric,
            augmentations_color=[])

    if stage == 'test' or stage is None:
        # --------------------------TEST----------------------------
        test_augmentations_geometric = get_geometric_augmentations(self.cfg, 'test')
        self.test_ds = GrowliFlower(
            path_to_dataset,
            mode='test',
            img_normalizer=image_normalizer,
            augmentations_geometric=test_augmentations_geometric,
            augmentations_color=[])

def train_dataloader(self) -> DataLoader:
    # Return DataLoader for Training Data here
    shuffle: bool = self.cfg['train']['shuffle']
    batch_size: int = self.cfg['train']['batch_size']
    n_workers: int = self.cfg['data']['num_workers']

    loader = DataLoader(self.train_ds, batch_size=batch_size, shuffle=shuffle, num_workers=n_workers,
                        drop_last=True, pin_memory=True)

    return loader

def val_dataloader(self) -> DataLoader:
    batch_size: int = self.cfg['val']['batch_size']
    n_workers: int = self.cfg['data']['num_workers']

    loader = DataLoader(self.val_ds, batch_size=batch_size, num_workers=n_workers, shuffle=False,
                        drop_last=True, pin_memory=True)

    return loader

def test_dataloader(self)-> DataLoader:
    batch_size: int = self.cfg['test']['batch_size']
    n_workers: int = self.cfg['data']['num_workers']

    loader = DataLoader(self.test_ds, batch_size=batch_size, num_workers=n_workers, shuffle=False, pin_memory=True)

    return loader

Oct 15 '24 12:10 chenfh21

Not sure, but the label format from GrowliFlower is definitely different. I only know that GrowliFlower has no weeds labeled.

(I have unfortunately no time to check and debug your code; however, @theroggio, @JaWeyl or @yuelinn might have some insights on the usage of GrowliFlower.

Oct 18 '24 18:10 jbehley