rtmlib
rtmlib copied to clipboard
rtmo preprocessing
Dear author,
I wonder if the input image should be in the center of the padded image? But in the code, the input image is on the top of the pad image.
In my opinion the code should be:
padded_img[int((self.model_input_size[0]-padded_shape[0])/2):int((self.model_input_size[0]-padded_shape[0])/2) + padded_shape[0], int((self.model_input_size[1]-padded_shape[1])/2):int((self.model_input_size[1]-padded_shape[1])/2) +padded_shape[1]] = resized_img
Best regards
def preprocess(self, img: np.ndarray):
"""Do preprocessing for RTMPose model inference.
Args:
img (np.ndarray): Input image in shape.
Returns:
tuple:
- resized_img (np.ndarray): Preprocessed image.
- center (np.ndarray): Center of image.
- scale (np.ndarray): Scale of image.
"""
if len(img.shape) == 3:
padded_img = np.ones(
(self.model_input_size[0], self.model_input_size[1], 3),
dtype=np.uint8) * 114
else:
padded_img = np.ones(self.model_input_size, dtype=np.uint8) * 114
ratio = min(self.model_input_size[0] / img.shape[0],
self.model_input_size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
interpolation=cv2.INTER_LINEAR,
).astype(np.uint8)
padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
padded_img[:padded_shape[0], :padded_shape[1]] = resized_img
# normalize image
if self.mean is not None:
self.mean = np.array(self.mean)
self.std = np.array(self.std)
padded_img = (padded_img - self.mean) / self.std
return padded_img, ratio