face-alignment icon indicating copy to clipboard operation
face-alignment copied to clipboard

How to do the actual alignment?

Open Coderx7 opened this issue 5 years ago • 16 comments

Hi @1adrianb : thanks a lot for all of this. really appreciate it I was wondering if its possible to get the aligned images. Your FaceAlignment class, as the name suggests, should provide such functionality, but I cant seem to find any examples that does the actual alignment.
Would you kindly show us how to do this as well?

Coderx7 avatar Jan 12 '20 05:01 Coderx7

def get_position(size, padding=0.25):
    
    x = [0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
                    0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
                    0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
                    0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
                    0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
                    0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
                    0.553364, 0.490127, 0.42689]
    
    y = [0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
                    0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
                    0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
                    0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
                    0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
                    0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
                    0.784792, 0.824182, 0.831803, 0.824182]
    
    x, y = np.array(x), np.array(y)
    
    x = (x + padding) / (2 * padding + 1)
    y = (y + padding) / (2 * padding + 1)
    x = x * size
    y = y * size
    return np.array(list(zip(x, y)))

def transformation_from_points(points1, points2):
    points1 = points1.astype(np.float64)
    points2 = points2.astype(np.float64)
 
    c1 = np.mean(points1, axis=0)
    c2 = np.mean(points2, axis=0)
    points1 -= c1
    points2 -= c2
    s1 = np.std(points1)
    s2 = np.std(points2)
    points1 /= s1
    points2 /= s2
 
    U, S, Vt = np.linalg.svd(points1.T * points2)
    R = (U * Vt).T
    return np.vstack([np.hstack(((s2 / s1) * R,
                                       c2.T - (s2 / s1) * R * c1.T)),
                         np.matrix([0., 0., 1.])])

I = cv2.imread('img.jpg')
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cuda')
point = fa.get_landmarks(I) 

front256 = get_position(256)
video = []
if(point is not None):
    shape = np.array(point[0])
    shape = shape[17:]
    M = transformation_from_points(np.matrix(shape), np.matrix(front256))
    
    img = cv2.warpAffine(scene, M[:2], (256, 256))
    (x, y) = front256[-20:].mean(0).astype(np.int32)
    w = 160//2
    img = img[y-w//2:y+w//2,x-w:x+w,...]
    img = cv2.resize(img, (128, 64))
    video.append(img)

This code worked for me. It uses affine transformation to align the face. Source: https://github.com/Fengdalu/LipNet-PyTorch/blob/master/demo.py

imadtoubal avatar Jul 29 '20 04:07 imadtoubal

Could you please write tutorial on actual face alignment ?

Because right now it hard to guess why get_points consists of those numbers, why we take slice as "shape[17:]" and so on.

Proger666 avatar Oct 11 '20 11:10 Proger666

For those of you that are looking for alignment and how its done, you can have a look here basically there are many ways you can do it, but the implementation that I have linked to, is both easy to follow and very good performance wise.

Coderx7 avatar Oct 11 '20 16:10 Coderx7

def get_position(size, padding=0.25):
    
    x = [0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
                    0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
                    0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
                    0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
                    0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
                    0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
                    0.553364, 0.490127, 0.42689]
    
    y = [0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
                    0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
                    0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
                    0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
                    0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
                    0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
                    0.784792, 0.824182, 0.831803, 0.824182]
    
    x, y = np.array(x), np.array(y)
    
    x = (x + padding) / (2 * padding + 1)
    y = (y + padding) / (2 * padding + 1)
    x = x * size
    y = y * size
    return np.array(list(zip(x, y)))

def transformation_from_points(points1, points2):
    points1 = points1.astype(np.float64)
    points2 = points2.astype(np.float64)
 
    c1 = np.mean(points1, axis=0)
    c2 = np.mean(points2, axis=0)
    points1 -= c1
    points2 -= c2
    s1 = np.std(points1)
    s2 = np.std(points2)
    points1 /= s1
    points2 /= s2
 
    U, S, Vt = np.linalg.svd(points1.T * points2)
    R = (U * Vt).T
    return np.vstack([np.hstack(((s2 / s1) * R,
                                       c2.T - (s2 / s1) * R * c1.T)),
                         np.matrix([0., 0., 1.])])

I = cv2.imread('img.jpg')
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cuda')
point = fa.get_landmarks(I) 

front256 = get_position(256)
video = []
if(point is not None):
    shape = np.array(point[0])
    shape = shape[17:]
    M = transformation_from_points(np.matrix(shape), np.matrix(front256))
    
    img = cv2.warpAffine(scene, M[:2], (256, 256))
    (x, y) = front256[-20:].mean(0).astype(np.int32)
    w = 160//2
    img = img[y-w//2:y+w//2,x-w:x+w,...]
    img = cv2.resize(img, (128, 64))
    video.append(img)

This code worked for me. It uses affine transformation to align the face. Source: https://github.com/Fengdalu/LipNet-PyTorch/blob/master/demo.py

How to get scene?

the scene variable is not assigned to anything, throwing an error,please check @Coderx7 @imadtoubal @Proger666

amrahsmaytas avatar Nov 30 '20 02:11 amrahsmaytas

For Anyone Still Looking, I'm using umeyame in 3d. https://github.com/iperov/DeepFaceLab/blob/master/core/mathlib/umeyama.py

If anyone has a better Idea, I'd love to hear it.


import numpy as np
import face_alignment
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from skimage import io
import collections

from core.mathlib import umeyama
from facelib import LandmarksProcessor, FaceType
import cv2


face_detector = 'sfd'
# Run the 3D face alignment on a test image, without CUDA.
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device='cuda', flip_input=True,
                                  face_detector=face_detector)


try:
    i = 0
    input_img = io.imread(IMG HERE)
except FileNotFoundError:
    input_img = io.imread('test/assets/aflw-test.jpg')

facepoints = fa.get_landmarks(input_img)[-1]


landmarks_68_3D = np.array([
    [-73.393523, -29.801432, 47.667532],  # 00
    [-72.775014, -10.949766, 45.909403],  # 01
    [-70.533638, 7.929818, 44.842580],  # 02
    [-66.850058, 26.074280, 43.141114],  # 03
    [-59.790187, 42.564390, 38.635298],  # 04
    [-48.368973, 56.481080, 30.750622],  # 05
    [-34.121101, 67.246992, 18.456453],  # 06
    [-17.875411, 75.056892, 3.609035],  # 07
    [0.098749, 77.061286, -0.881698],  # 08
    [17.477031, 74.758448, 5.181201],  # 09
    [32.648966, 66.929021, 19.176563],  # 10
    [46.372358, 56.311389, 30.770570],  # 11
    [57.343480, 42.419126, 37.628629],  # 12
    [64.388482, 25.455880, 40.886309],  # 13
    [68.212038, 6.990805, 42.281449],  # 14
    [70.486405, -11.666193, 44.142567],  # 15
    [71.375822, -30.365191, 47.140426],  # 16
    [-61.119406, -49.361602, 14.254422],  # 17
    [-51.287588, -58.769795, 7.268147],  # 18
    [-37.804800, -61.996155, 0.442051],  # 19
    [-24.022754, -61.033399, -6.606501],  # 20
    [-11.635713, -56.686759, -11.967398],  # 21
    [12.056636, -57.391033, -12.051204],  # 22
    [25.106256, -61.902186, -7.315098],  # 23
    [38.338588, -62.777713, -1.022953],  # 24
    [51.191007, -59.302347, 5.349435],  # 25
    [60.053851, -50.190255, 11.615746],  # 26
    [0.653940, -42.193790, -13.380835],  # 27
    [0.804809, -30.993721, -21.150853],  # 28
    [0.992204, -19.944596, -29.284036],  # 29
    [1.226783, -8.414541, -36.948060],  # 00
    [-14.772472, 2.598255, -20.132003],  # 01
    [-7.180239, 4.751589, -23.536684],  # 02
    [0.555920, 6.562900, -25.944448],  # 03
    [8.272499, 4.661005, -23.695741],  # 04
    [15.214351, 2.643046, -20.858157],  # 05
    [-46.047290, -37.471411, 7.037989],  # 06
    [-37.674688, -42.730510, 3.021217],  # 07
    [-27.883856, -42.711517, 1.353629],  # 08
    [-19.648268, -36.754742, -0.111088],  # 09
    [-28.272965, -35.134493, -0.147273],  # 10
    [-38.082418, -34.919043, 1.476612],  # 11
    [19.265868, -37.032306, -0.665746],  # 12
    [27.894191, -43.342445, 0.247660],  # 13
    [37.437529, -43.110822, 1.696435],  # 14
    [45.170805, -38.086515, 4.894163],  # 15
    [38.196454, -35.532024, 0.282961],  # 16
    [28.764989, -35.484289, -1.172675],  # 17
    [-28.916267, 28.612716, -2.240310],  # 18
    [-17.533194, 22.172187, -15.934335],  # 19
    [-6.684590, 19.029051, -22.611355],  # 20
    [0.381001, 20.721118, -23.748437],  # 21
    [8.375443, 19.035460, -22.721995],  # 22
    [18.876618, 22.394109, -15.610679],  # 23
    [28.794412, 28.079924, -3.217393],  # 24
    [19.057574, 36.298248, -14.987997],  # 25
    [8.956375, 39.634575, -22.554245],  # 26
    [0.381549, 40.395647, -23.591626],  # 27
    [-7.428895, 39.836405, -22.406106],  # 28
    [-18.160634, 36.677899, -15.121907],  # 29
    [-24.377490, 28.677771, -4.785684],  # 30
    [-6.897633, 25.475976, -20.893742],  # 31
    [0.340663, 26.014269, -22.220479],  # 32
    [8.444722, 25.326198, -21.025520],  # 33
    [24.474473, 28.323008, -5.712776],  # 34
    [8.449166, 30.596216, -20.671489],  # 35
    [0.205322, 31.408738, -21.903670],  # 36
    [-7.198266, 30.844876, -20.328022]  # 37
], dtype=np.float32)




#normalize everything to image range
pts2 = np.float32(( (0,0),(160,0),(160,160) ))
centered_face  = facepoints - facepoints.mean(axis=0)
landmarks_68_3D  = landmarks_68_3D #- landmarks_68_3D.mean(axis=0)



def transform_points(points, mat, invert=False):
    if invert:
        mat = np.linalg.inv(mat)
    ones = np.zeros(points.shape[0])
    points = np.column_stack((points, ones))
    points = ([email protected]).T
    points = points[:, :3]
    return points


pred_type = collections.namedtuple('prediction_type', ['slice', 'color'])
pred_types = {'face': pred_type(slice(0, 17), (0.682, 0.780, 0.909, 0.5)),
              'eyebrow1': pred_type(slice(17, 22), (1.0, 0.498, 0.055, 0.4)),
              'eyebrow2': pred_type(slice(22, 27), (1.0, 0.498, 0.055, 0.4)),
              'nose': pred_type(slice(27, 31), (0.345, 0.239, 0.443, 0.4)),
              'nostril': pred_type(slice(31, 36), (0.345, 0.239, 0.443, 0.4)),
              'eye1': pred_type(slice(36, 42), (0.596, 0.875, 0.541, 0.3)),
              'eye2': pred_type(slice(42, 48), (0.596, 0.875, 0.541, 0.3)),
              'lips': pred_type(slice(48, 60), (0.596, 0.875, 0.541, 0.3)),
              'teeth': pred_type(slice(60, 68), (0.596, 0.875, 0.541, 0.4))
              }

fig = plt.figure(figsize=plt.figaspect(.5))
ax = fig.add_subplot(2, 3, 1, projection='3d')

# plot 1
surf = ax.scatter(centered_face[:, 0],
                  centered_face[:, 1],
                  centered_face[:, 2],
                  c='cyan',
                  alpha=1.0,
                  edgecolor='b')

for pred_type in pred_types.values():
    ax.plot3D(centered_face[pred_type.slice, 0],
              centered_face[pred_type.slice, 1],
              centered_face[pred_type.slice, 2], color='blue')

ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
ax.title.set_text("input_face")


ax = fig.add_subplot(2, 3, 2, projection='3d')
surf = ax.scatter(landmarks_68_3D[:, 0],
                  landmarks_68_3D[:, 1],
                  landmarks_68_3D[:, 2],
                  c='cyan',
                  alpha=1.0,
                  edgecolor='b')

for pred_type in pred_types.values():
    ax.plot3D(landmarks_68_3D[pred_type.slice, 0],
              landmarks_68_3D[pred_type.slice, 1],
              landmarks_68_3D[pred_type.slice, 2], color='blue')

ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
ax.title.set_text("input landmarks")

# transformed plot





mat = umeyama(centered_face,landmarks_68_3D, True)
transformed_face = transform_points(centered_face,mat,False)





ax = fig.add_subplot(2, 3, 3, projection='3d')
surf = ax.scatter(transformed_face[:, 0],
                  transformed_face[:, 1],
                  transformed_face[:, 2],
                  c='cyan',
                  alpha=1.0,
                  edgecolor='b')

for pred_type in pred_types.values():
    ax.plot3D(transformed_face[pred_type.slice, 0],
              transformed_face[pred_type.slice, 1],
              transformed_face[pred_type.slice, 2], color='blue')

ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
ax.title.set_text("transformed input")

plt.show()

image

josephbiko avatar Feb 02 '21 10:02 josephbiko

hi,i want to known how can i get landmarks_68_3D?

wywywy01 avatar Mar 24 '21 08:03 wywywy01

Either use these, or checkout basel face model

josephbiko avatar Mar 24 '21 11:03 josephbiko

def get_position(size, padding=0.25):
    
    x = [0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
                    0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
                    0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
                    0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
                    0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
                    0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
                    0.553364, 0.490127, 0.42689]
    
    y = [0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
                    0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
                    0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
                    0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
                    0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
                    0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
                    0.784792, 0.824182, 0.831803, 0.824182]
    
    x, y = np.array(x), np.array(y)
    
    x = (x + padding) / (2 * padding + 1)
    y = (y + padding) / (2 * padding + 1)
    x = x * size
    y = y * size
    return np.array(list(zip(x, y)))

def transformation_from_points(points1, points2):
    points1 = points1.astype(np.float64)
    points2 = points2.astype(np.float64)
 
    c1 = np.mean(points1, axis=0)
    c2 = np.mean(points2, axis=0)
    points1 -= c1
    points2 -= c2
    s1 = np.std(points1)
    s2 = np.std(points2)
    points1 /= s1
    points2 /= s2
 
    U, S, Vt = np.linalg.svd(points1.T * points2)
    R = (U * Vt).T
    return np.vstack([np.hstack(((s2 / s1) * R,
                                       c2.T - (s2 / s1) * R * c1.T)),
                         np.matrix([0., 0., 1.])])

I = cv2.imread('img.jpg')
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cuda')
point = fa.get_landmarks(I) 

front256 = get_position(256)
video = []
if(point is not None):
    shape = np.array(point[0])
    shape = shape[17:]
    M = transformation_from_points(np.matrix(shape), np.matrix(front256))
    
    img = cv2.warpAffine(scene, M[:2], (256, 256))
    (x, y) = front256[-20:].mean(0).astype(np.int32)
    w = 160//2
    img = img[y-w//2:y+w//2,x-w:x+w,...]
    img = cv2.resize(img, (128, 64))
    video.append(img)

This code worked for me. It uses affine transformation to align the face. Source: https://github.com/Fengdalu/LipNet-PyTorch/blob/master/demo.py

How to get scene?

the scene variable is not assigned to anything, throwing an error,please check @Coderx7 @imadtoubal @Proger666

scene is the image that you want to align as a np.array

mea-lab avatar Jul 03 '21 04:07 mea-lab

Which is this library "facelib"? Can you please provide the link to its source code?

I found the following libraries with the same name:

  1. https://github.com/sajjjadayobi/FaceLib
  2. https://github.com/kutayyildiz/facelib

ravikiranrao avatar Dec 01 '21 14:12 ravikiranrao

I know, a bit odd. But I stole it from: https://github.com/iperov/DeepFaceLab/tree/master/facelib

good luck!

josephbiko avatar Dec 01 '21 15:12 josephbiko

Thanks @josephbiko though you are not using those in the current code. Apologies.

ravikiranrao avatar Dec 01 '21 15:12 ravikiranrao

For Anyone Still Looking, I'm using umeyame in 3d. https://github.com/iperov/DeepFaceLab/blob/master/core/mathlib/umeyama.py

If anyone has a better Idea, I'd love to hear it.


import numpy as np
import face_alignment
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from skimage import io
import collections

from core.mathlib import umeyama
from facelib import LandmarksProcessor, FaceType
import cv2


face_detector = 'sfd'
# Run the 3D face alignment on a test image, without CUDA.
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device='cuda', flip_input=True,
                                  face_detector=face_detector)


try:
    i = 0
    input_img = io.imread(IMG HERE)
except FileNotFoundError:
    input_img = io.imread('test/assets/aflw-test.jpg')

facepoints = fa.get_landmarks(input_img)[-1]


landmarks_68_3D = np.array([
    [-73.393523, -29.801432, 47.667532],  # 00
    [-72.775014, -10.949766, 45.909403],  # 01
    [-70.533638, 7.929818, 44.842580],  # 02
    [-66.850058, 26.074280, 43.141114],  # 03
    [-59.790187, 42.564390, 38.635298],  # 04
    [-48.368973, 56.481080, 30.750622],  # 05
    [-34.121101, 67.246992, 18.456453],  # 06
    [-17.875411, 75.056892, 3.609035],  # 07
    [0.098749, 77.061286, -0.881698],  # 08
    [17.477031, 74.758448, 5.181201],  # 09
    [32.648966, 66.929021, 19.176563],  # 10
    [46.372358, 56.311389, 30.770570],  # 11
    [57.343480, 42.419126, 37.628629],  # 12
    [64.388482, 25.455880, 40.886309],  # 13
    [68.212038, 6.990805, 42.281449],  # 14
    [70.486405, -11.666193, 44.142567],  # 15
    [71.375822, -30.365191, 47.140426],  # 16
    [-61.119406, -49.361602, 14.254422],  # 17
    [-51.287588, -58.769795, 7.268147],  # 18
    [-37.804800, -61.996155, 0.442051],  # 19
    [-24.022754, -61.033399, -6.606501],  # 20
    [-11.635713, -56.686759, -11.967398],  # 21
    [12.056636, -57.391033, -12.051204],  # 22
    [25.106256, -61.902186, -7.315098],  # 23
    [38.338588, -62.777713, -1.022953],  # 24
    [51.191007, -59.302347, 5.349435],  # 25
    [60.053851, -50.190255, 11.615746],  # 26
    [0.653940, -42.193790, -13.380835],  # 27
    [0.804809, -30.993721, -21.150853],  # 28
    [0.992204, -19.944596, -29.284036],  # 29
    [1.226783, -8.414541, -36.948060],  # 00
    [-14.772472, 2.598255, -20.132003],  # 01
    [-7.180239, 4.751589, -23.536684],  # 02
    [0.555920, 6.562900, -25.944448],  # 03
    [8.272499, 4.661005, -23.695741],  # 04
    [15.214351, 2.643046, -20.858157],  # 05
    [-46.047290, -37.471411, 7.037989],  # 06
    [-37.674688, -42.730510, 3.021217],  # 07
    [-27.883856, -42.711517, 1.353629],  # 08
    [-19.648268, -36.754742, -0.111088],  # 09
    [-28.272965, -35.134493, -0.147273],  # 10
    [-38.082418, -34.919043, 1.476612],  # 11
    [19.265868, -37.032306, -0.665746],  # 12
    [27.894191, -43.342445, 0.247660],  # 13
    [37.437529, -43.110822, 1.696435],  # 14
    [45.170805, -38.086515, 4.894163],  # 15
    [38.196454, -35.532024, 0.282961],  # 16
    [28.764989, -35.484289, -1.172675],  # 17
    [-28.916267, 28.612716, -2.240310],  # 18
    [-17.533194, 22.172187, -15.934335],  # 19
    [-6.684590, 19.029051, -22.611355],  # 20
    [0.381001, 20.721118, -23.748437],  # 21
    [8.375443, 19.035460, -22.721995],  # 22
    [18.876618, 22.394109, -15.610679],  # 23
    [28.794412, 28.079924, -3.217393],  # 24
    [19.057574, 36.298248, -14.987997],  # 25
    [8.956375, 39.634575, -22.554245],  # 26
    [0.381549, 40.395647, -23.591626],  # 27
    [-7.428895, 39.836405, -22.406106],  # 28
    [-18.160634, 36.677899, -15.121907],  # 29
    [-24.377490, 28.677771, -4.785684],  # 30
    [-6.897633, 25.475976, -20.893742],  # 31
    [0.340663, 26.014269, -22.220479],  # 32
    [8.444722, 25.326198, -21.025520],  # 33
    [24.474473, 28.323008, -5.712776],  # 34
    [8.449166, 30.596216, -20.671489],  # 35
    [0.205322, 31.408738, -21.903670],  # 36
    [-7.198266, 30.844876, -20.328022]  # 37
], dtype=np.float32)




#normalize everything to image range
pts2 = np.float32(( (0,0),(160,0),(160,160) ))
centered_face  = facepoints - facepoints.mean(axis=0)
landmarks_68_3D  = landmarks_68_3D #- landmarks_68_3D.mean(axis=0)



def transform_points(points, mat, invert=False):
    if invert:
        mat = np.linalg.inv(mat)
    ones = np.zeros(points.shape[0])
    points = np.column_stack((points, ones))
    points = ([email protected]).T
    points = points[:, :3]
    return points


pred_type = collections.namedtuple('prediction_type', ['slice', 'color'])
pred_types = {'face': pred_type(slice(0, 17), (0.682, 0.780, 0.909, 0.5)),
              'eyebrow1': pred_type(slice(17, 22), (1.0, 0.498, 0.055, 0.4)),
              'eyebrow2': pred_type(slice(22, 27), (1.0, 0.498, 0.055, 0.4)),
              'nose': pred_type(slice(27, 31), (0.345, 0.239, 0.443, 0.4)),
              'nostril': pred_type(slice(31, 36), (0.345, 0.239, 0.443, 0.4)),
              'eye1': pred_type(slice(36, 42), (0.596, 0.875, 0.541, 0.3)),
              'eye2': pred_type(slice(42, 48), (0.596, 0.875, 0.541, 0.3)),
              'lips': pred_type(slice(48, 60), (0.596, 0.875, 0.541, 0.3)),
              'teeth': pred_type(slice(60, 68), (0.596, 0.875, 0.541, 0.4))
              }

fig = plt.figure(figsize=plt.figaspect(.5))
ax = fig.add_subplot(2, 3, 1, projection='3d')

# plot 1
surf = ax.scatter(centered_face[:, 0],
                  centered_face[:, 1],
                  centered_face[:, 2],
                  c='cyan',
                  alpha=1.0,
                  edgecolor='b')

for pred_type in pred_types.values():
    ax.plot3D(centered_face[pred_type.slice, 0],
              centered_face[pred_type.slice, 1],
              centered_face[pred_type.slice, 2], color='blue')

ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
ax.title.set_text("input_face")


ax = fig.add_subplot(2, 3, 2, projection='3d')
surf = ax.scatter(landmarks_68_3D[:, 0],
                  landmarks_68_3D[:, 1],
                  landmarks_68_3D[:, 2],
                  c='cyan',
                  alpha=1.0,
                  edgecolor='b')

for pred_type in pred_types.values():
    ax.plot3D(landmarks_68_3D[pred_type.slice, 0],
              landmarks_68_3D[pred_type.slice, 1],
              landmarks_68_3D[pred_type.slice, 2], color='blue')

ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
ax.title.set_text("input landmarks")

# transformed plot





mat = umeyama(centered_face,landmarks_68_3D, True)
transformed_face = transform_points(centered_face,mat,False)





ax = fig.add_subplot(2, 3, 3, projection='3d')
surf = ax.scatter(transformed_face[:, 0],
                  transformed_face[:, 1],
                  transformed_face[:, 2],
                  c='cyan',
                  alpha=1.0,
                  edgecolor='b')

for pred_type in pred_types.values():
    ax.plot3D(transformed_face[pred_type.slice, 0],
              transformed_face[pred_type.slice, 1],
              transformed_face[pred_type.slice, 2], color='blue')

ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
ax.title.set_text("transformed input")

plt.show()

image

works for me

lvZic avatar Apr 07 '22 07:04 lvZic

def get_position(size, padding=0.25):
    
    x = [0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
                    0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
                    0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
                    0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
                    0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
                    0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
                    0.553364, 0.490127, 0.42689]
    
    y = [0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
                    0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
                    0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
                    0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
                    0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
                    0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
                    0.784792, 0.824182, 0.831803, 0.824182]
    
    x, y = np.array(x), np.array(y)
    
    x = (x + padding) / (2 * padding + 1)
    y = (y + padding) / (2 * padding + 1)
    x = x * size
    y = y * size
    return np.array(list(zip(x, y)))

def transformation_from_points(points1, points2):
    points1 = points1.astype(np.float64)
    points2 = points2.astype(np.float64)
 
    c1 = np.mean(points1, axis=0)
    c2 = np.mean(points2, axis=0)
    points1 -= c1
    points2 -= c2
    s1 = np.std(points1)
    s2 = np.std(points2)
    points1 /= s1
    points2 /= s2
 
    U, S, Vt = np.linalg.svd(points1.T * points2)
    R = (U * Vt).T
    return np.vstack([np.hstack(((s2 / s1) * R,
                                       c2.T - (s2 / s1) * R * c1.T)),
                         np.matrix([0., 0., 1.])])

I = cv2.imread('img.jpg')
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cuda')
point = fa.get_landmarks(I) 

front256 = get_position(256)
video = []
if(point is not None):
    shape = np.array(point[0])
    shape = shape[17:]
    M = transformation_from_points(np.matrix(shape), np.matrix(front256))
    
    img = cv2.warpAffine(scene, M[:2], (256, 256))
    (x, y) = front256[-20:].mean(0).astype(np.int32)
    w = 160//2
    img = img[y-w//2:y+w//2,x-w:x+w,...]
    img = cv2.resize(img, (128, 64))
    video.append(img)

This code worked for me. It uses affine transformation to align the face. Source: https://github.com/Fengdalu/LipNet-PyTorch/blob/master/demo.py

I tried this as well by assigning "scene" variable to image itself and it seems to work but the output is just the mouth (aligned but just mouth), probably because LipNet is focusing on lips only. Since the math operations are confusing for me i couldn't figure out where exactly the cropping is done. Could you please help me to extract aligned face?

fosforoglu avatar Sep 16 '22 07:09 fosforoglu

def get_position(size, padding=0.25):
    
    x = [0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
                    0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
                    0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
                    0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
                    0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
                    0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
                    0.553364, 0.490127, 0.42689]
    
    y = [0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
                    0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
                    0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
                    0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
                    0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
                    0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
                    0.784792, 0.824182, 0.831803, 0.824182]
    
    x, y = np.array(x), np.array(y)
    
    x = (x + padding) / (2 * padding + 1)
    y = (y + padding) / (2 * padding + 1)
    x = x * size
    y = y * size
    return np.array(list(zip(x, y)))

def transformation_from_points(points1, points2):
    points1 = points1.astype(np.float64)
    points2 = points2.astype(np.float64)
 
    c1 = np.mean(points1, axis=0)
    c2 = np.mean(points2, axis=0)
    points1 -= c1
    points2 -= c2
    s1 = np.std(points1)
    s2 = np.std(points2)
    points1 /= s1
    points2 /= s2
 
    U, S, Vt = np.linalg.svd(points1.T * points2)
    R = (U * Vt).T
    return np.vstack([np.hstack(((s2 / s1) * R,
                                       c2.T - (s2 / s1) * R * c1.T)),
                         np.matrix([0., 0., 1.])])

I = cv2.imread('img.jpg')
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cuda')
point = fa.get_landmarks(I) 

front256 = get_position(256)
video = []
if(point is not None):
    shape = np.array(point[0])
    shape = shape[17:]
    M = transformation_from_points(np.matrix(shape), np.matrix(front256))
    
    img = cv2.warpAffine(scene, M[:2], (256, 256))
    (x, y) = front256[-20:].mean(0).astype(np.int32)
    w = 160//2
    img = img[y-w//2:y+w//2,x-w:x+w,...]
    img = cv2.resize(img, (128, 64))
    video.append(img)

This code worked for me. It uses affine transformation to align the face. Source: https://github.com/Fengdalu/LipNet-PyTorch/blob/master/demo.py

I tried this as well by assigning "scene" variable to image itself and it seems to work but the output is just the mouth (aligned but just mouth), probably because LipNet is focusing on lips only. Since the math operations are confusing for me i couldn't figure out where exactly the cropping is done. Could you please help me to extract aligned face?

You should look at those lines:

    w = 160//2
    img = img[y-w//2:y+w//2,x-w:x+w,...]
    img = cv2.resize(img, (128, 64))

If you want to obtain the full face, just remove this crop.

pepealessio avatar Nov 24 '22 14:11 pepealessio

@josephbiko Hi I was wondering. After aligning the landmarks, how did you project/warpAffine the original image onto the aligned landmarks? Thanks

f-izzat avatar May 09 '23 00:05 f-izzat

mark

cucdengjunli avatar Nov 15 '23 09:11 cucdengjunli