py-feat
py-feat copied to clipboard
New functionality -> detect_frame
Can You implement below ?
def detect_frame(
self,
frames,
output_size=None,
batch_size=1,
num_workers=0,
pin_memory=False,
frame_counter=0,
face_detection_threshold=0.5,
**kwargs,
):
"""
The same functionality as `detect_image` but instead of `str` img user can pass `np.ndarray` frame
"""
# Keyword arguments than can be passed to the underlying models
face_model_kwargs = kwargs.pop("face_model_kwargs", dict())
landmark_model_kwargs = kwargs.pop("landmark_model_kwargs", dict())
au_model_kwargs = kwargs.pop("au_model_kwargs", dict())
emotion_model_kwargs = kwargs.pop("emotion_model_kwargs", dict())
facepose_model_kwargs = kwargs.pop("facepose_model_kwargs", dict())
data_loader = DataLoader(
FrameDataset(
frames,
output_size=output_size,
preserve_aspect_ratio=True,
padding=True,
),
num_workers=num_workers,
batch_size=batch_size,
pin_memory=pin_memory,
shuffle=False,
)
if self.info["landmark_model"] == "mobilenet" and batch_size > 1:
warnings.warn(
"Currently using mobilenet for landmark detection with batch_size > 1 may lead to erroneous detections."
" We recommend either setting batch_size=1 or using mobilefacenet as the landmark detection model."
" You can follow this issue for more: https://github.com/cosanlab/py-feat/issues/151"
)
try:
batch_output = []
for batch_id, batch_data in enumerate(tqdm(data_loader)):
faces, landmarks, poses, aus, emotions = self._run_detection_waterfall(
batch_data,
face_detection_threshold,
face_model_kwargs,
landmark_model_kwargs,
facepose_model_kwargs,
emotion_model_kwargs,
au_model_kwargs,
)
output = self._create_fex(
faces,
landmarks,
poses,
aus,
emotions,
batch_data["FileNames"],
frame_counter,
)
batch_output.append(output)
frame_counter += 1 * batch_size
batch_output = pd.concat(batch_output)
batch_output.reset_index(drop=True, inplace=True)
return batch_output
except RuntimeError as e:
raise ValueError(
f"when using a batch_size > 1 all images must have the same dimensions or output_size must not be None"
f" so py-feat can rescale images to output_size. See pytorch error: \n{e}"
)
Im lacking such implementation for raw frames based on np.ndarray
as well FrameDataset
class FrameDataset(Dataset):
"""New implementation of `feat.data.ImageDataset` to handle `np.ndarray` frames.
To name each frame UUID4 is used.
"""
def __init__(
self, frames: np.ndarray | list[np.ndarray], output_size=None, preserve_aspect_ratio=True, padding=False
):
if not isinstance(frames, list):
frames = [frames]
self.frames = frames
self.output_size = output_size
self.preserve_aspect_ratio = preserve_aspect_ratio
self.padding = padding
def __len__(self):
return len(self.frames)
def __getitem__(self, idx):
# Dimensions are [channels, height, width]
temp_name = str(uuid.uuid4())
frame = self.frames[idx]
frame = transforms.ToPILImage()(frame)
frame = transforms.PILToTensor()(frame)
if frame.shape[0] == 4:
frame = frame[:3, ...]
if frame.shape[0] == 1:
frame = torch.cat([frame, frame, frame], dim=0)
if self.output_size is not None:
transform = transforms.Compose(
[
Rescale(
self.output_size,
preserve_aspect_ratio=self.preserve_aspect_ratio,
padding=self.padding,
)
]
)
transformed_img = transform(frame)
return {
"Image": transformed_img["Image"],
"Scale": transformed_img["Scale"],
"Padding": transformed_img["Padding"],
"FileNames": temp_name,
}
else:
return {
"Image": frame,
"Scale": 1.0,
"Padding": {"Left": 0, "Top": 0, "Right": 0, "Bottom": 0},
"FileNames": temp_name,
}
Thanks @pretbc for the suggestion. We will add something like soon when we do our next code sprint. We have something similar implemented for our forthcoming live demo standalone app and will try to integrate your suggestion with what we've already created.