Pre-processing and model inputs
-
gaze model takes 4 inputs
gaze = model(left_eye_img, right_eye_img, face_img, rects) -
I am generating these inputs using face detection via blazeface model
def preprocess(image):
# Check if the image is None
if image is None:
raise ValueError("Image is not loaded properly!")
# Resize the image to 1000x1000
image_resized = cv2.resize(image, (1000, 1000))
# Convert the BGR image to RGB for MediaPipe
image_rgb = cv2.cvtColor(image_resized, cv2.COLOR_BGR2RGB)
# Use the BlazeFace model for face detection
with mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5) as face_detection:
results = face_detection.process(image_rgb)
# If no faces detected, return empty
if not results.detections:
return None, None, None, None
# Initialize variables for storing images and rectangles
face_img = None
left_eye_img = None
right_eye_img = None
rects_ = []
# Loop over the detected faces
for detection in results.detections:
# Extract bounding box information
bboxC = detection.location_data.relative_bounding_box
ih, iw, _ = image_resized.shape
# Get normalized coordinates of the bounding box
x_min = bboxC.xmin
y_min = bboxC.ymin
x_max = (bboxC.xmin + bboxC.width)
y_max = (bboxC.ymin + bboxC.height)
# Normalize the coordinates for the rect (following the provided format)
face_rect = [
x_max - x_min, # Width
y_max - y_min, # Height
x_min, # x_min
y_min # y_min
]
# Add face rect to rects_ list
rects_ = [face_rect]
# Crop the face image from the resized image
imgae_y_min = int(y_min * ih)
imgae_y_max = int(y_max * ih)
imgae_x_min = int(x_min * iw)
imgae_x_max = int(x_max * iw)
face_img = image_resized[imgae_y_min:imgae_y_max, imgae_x_min:imgae_x_max]
# Eye landmarks (MediaPipe provides a set of keypoints)
keypoints = detection.location_data.relative_keypoints
# Left and right eyes are keypoints 0 and 1
left_eye = keypoints[0] # Left eye keypoint
right_eye = keypoints[1] # Right eye keypoint
# Calculate bounding box for left and right eyes (around the keypoints)
eye_size = 40 # Size for the eye images (adjust as needed)
# Left eye bounding box
left_eye_x = int(left_eye.x * iw)
left_eye_y = int(left_eye.y * ih)
left_eye_box = [
left_eye_x - eye_size, left_eye_y - eye_size,
left_eye_x + eye_size, left_eye_y + eye_size
]
# Right eye bounding box
right_eye_x = int(right_eye.x * iw)
right_eye_y = int(right_eye.y * ih)
right_eye_box = [
right_eye_x - eye_size, right_eye_y - eye_size,
right_eye_x + eye_size, right_eye_y + eye_size
]
# Normalize the eye bounding boxes
left_eye_rect = [
(left_eye_box[2] - left_eye_box[0]) / iw, # Width normalized
(left_eye_box[3] - left_eye_box[1]) / ih, # Height normalized
left_eye_box[0] / iw, # x_min normalized
left_eye_box[1] / ih # y_min normalized
]
right_eye_rect = [
(right_eye_box[2] - right_eye_box[0]) / iw, # Width normalized
(right_eye_box[3] - right_eye_box[1]) / ih, # Height normalized
right_eye_box[0] / iw, # x_min normalized
right_eye_box[1] / ih # y_min normalized
]
# Add the eye rects to rects_
rects_.append(left_eye_rect)
rects_.append(right_eye_rect)
# Crop the eye images from the resized image
left_eye_img = image_resized[left_eye_y-eye_size:left_eye_y+eye_size, left_eye_x-eye_size:left_eye_x+eye_size]
right_eye_img = image_resized[right_eye_y-eye_size:right_eye_y+eye_size, right_eye_x-eye_size:right_eye_x+eye_size]
# show the images of left eye, right eye, face
cv2.imshow("left eye", left_eye_img)
cv2.imshow("right eye", right_eye_img)
cv2.imshow("face", face_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
return face_img, left_eye_img, right_eye_img, rects_
- ~What am I doing wrong as my rects_ has a negative value for a given image~ (solved)
Corresponding rects:
[[0.5501710772514343, 0.5502035617828369, 0.22646528482437134, 0.21633373200893402], [0.08, 0.08, 0.354, 0.335], [0.08, 0.08, 0.568, 0.343]]
Verifying these points a graph:
I have marked the coordinates of top-left corners for face and eye boxes.
- When I execute the model using these inputs I get:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x14400 and 1600x128)
I have simplified the code and made a script that runs on single image: Please find it here: https://gist.github.com/ceyxasm/2c509d6368bc97d741176a5a00a7c717
The script needs to be run in the same folder as the root of the repo
TLDR; with what it seems, the input sizes of face_img, left_eye_img, right_eye_img must be fixed; but what are they?