py-feat icon indicating copy to clipboard operation
py-feat copied to clipboard

New functionality -> detect_frame

Open pretbc opened this issue 2 years ago • 1 comments

Can You implement below ?

    def detect_frame(
            self,
            frames,
            output_size=None,
            batch_size=1,
            num_workers=0,
            pin_memory=False,
            frame_counter=0,
            face_detection_threshold=0.5,
            **kwargs,
    ):
        """
        The same functionality as `detect_image` but instead of `str` img user can pass `np.ndarray` frame
        """

        # Keyword arguments than can be passed to the underlying models
        face_model_kwargs = kwargs.pop("face_model_kwargs", dict())
        landmark_model_kwargs = kwargs.pop("landmark_model_kwargs", dict())
        au_model_kwargs = kwargs.pop("au_model_kwargs", dict())
        emotion_model_kwargs = kwargs.pop("emotion_model_kwargs", dict())
        facepose_model_kwargs = kwargs.pop("facepose_model_kwargs", dict())

        data_loader = DataLoader(
            FrameDataset(
                frames,
                output_size=output_size,
                preserve_aspect_ratio=True,
                padding=True,
            ),
            num_workers=num_workers,
            batch_size=batch_size,
            pin_memory=pin_memory,
            shuffle=False,
        )

        if self.info["landmark_model"] == "mobilenet" and batch_size > 1:
            warnings.warn(
                "Currently using mobilenet for landmark detection with batch_size > 1 may lead to erroneous detections."
                " We recommend either setting batch_size=1 or using mobilefacenet as the landmark detection model."
                " You can follow this issue for more: https://github.com/cosanlab/py-feat/issues/151"
            )

        try:
            batch_output = []

            for batch_id, batch_data in enumerate(tqdm(data_loader)):
                faces, landmarks, poses, aus, emotions = self._run_detection_waterfall(
                    batch_data,
                    face_detection_threshold,
                    face_model_kwargs,
                    landmark_model_kwargs,
                    facepose_model_kwargs,
                    emotion_model_kwargs,
                    au_model_kwargs,
                )

                output = self._create_fex(
                    faces,
                    landmarks,
                    poses,
                    aus,
                    emotions,
                    batch_data["FileNames"],
                    frame_counter,
                )
                batch_output.append(output)
                frame_counter += 1 * batch_size

            batch_output = pd.concat(batch_output)
            batch_output.reset_index(drop=True, inplace=True)

            return batch_output
        except RuntimeError as e:
            raise ValueError(
                f"when using a batch_size > 1 all images must have the same dimensions or output_size must not be None"
                f" so py-feat can rescale images to output_size. See pytorch error: \n{e}"
            )

Im lacking such implementation for raw frames based on np.ndarray

as well FrameDataset

class FrameDataset(Dataset):
    """New implementation of `feat.data.ImageDataset` to handle `np.ndarray` frames.

    To name each frame UUID4 is used.
    """

    def __init__(
            self, frames: np.ndarray | list[np.ndarray], output_size=None, preserve_aspect_ratio=True, padding=False
    ):
        if not isinstance(frames, list):
            frames = [frames]
        self.frames = frames
        self.output_size = output_size
        self.preserve_aspect_ratio = preserve_aspect_ratio
        self.padding = padding

    def __len__(self):
        return len(self.frames)

    def __getitem__(self, idx):
        # Dimensions are [channels, height, width]
        temp_name = str(uuid.uuid4())
        frame = self.frames[idx]
        frame = transforms.ToPILImage()(frame)
        frame = transforms.PILToTensor()(frame)
        if frame.shape[0] == 4:
            frame = frame[:3, ...]
        if frame.shape[0] == 1:
            frame = torch.cat([frame, frame, frame], dim=0)
        if self.output_size is not None:
            transform = transforms.Compose(
                [
                    Rescale(
                        self.output_size,
                        preserve_aspect_ratio=self.preserve_aspect_ratio,
                        padding=self.padding,
                    )
                ]
            )
            transformed_img = transform(frame)
            return {
                "Image": transformed_img["Image"],
                "Scale": transformed_img["Scale"],
                "Padding": transformed_img["Padding"],
                "FileNames": temp_name,
            }
        else:
            return {
                "Image": frame,
                "Scale": 1.0,
                "Padding": {"Left": 0, "Top": 0, "Right": 0, "Bottom": 0},
                "FileNames": temp_name,
            }

pretbc avatar Nov 09 '23 09:11 pretbc

Thanks @pretbc for the suggestion. We will add something like soon when we do our next code sprint. We have something similar implemented for our forthcoming live demo standalone app and will try to integrate your suggestion with what we've already created.

ljchang avatar Jan 03 '24 16:01 ljchang