face-alignment How to do the actual alignment?

Hi @1adrianb : thanks a lot for all of this. really appreciate it I was wondering if its possible to get the aligned images. Your FaceAlignment class, as the name suggests, should provide such functionality, but I cant seem to find any examples that does the actual alignment.
Would you kindly show us how to do this as well?

Jan 12 '20 05:01 Coderx7

def get_position(size, padding=0.25):
    
    x = [0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
                    0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
                    0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
                    0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
                    0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
                    0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
                    0.553364, 0.490127, 0.42689]
    
    y = [0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
                    0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
                    0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
                    0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
                    0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
                    0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
                    0.784792, 0.824182, 0.831803, 0.824182]
    
    x, y = np.array(x), np.array(y)
    
    x = (x + padding) / (2 * padding + 1)
    y = (y + padding) / (2 * padding + 1)
    x = x * size
    y = y * size
    return np.array(list(zip(x, y)))

def transformation_from_points(points1, points2):
    points1 = points1.astype(np.float64)
    points2 = points2.astype(np.float64)
 
    c1 = np.mean(points1, axis=0)
    c2 = np.mean(points2, axis=0)
    points1 -= c1
    points2 -= c2
    s1 = np.std(points1)
    s2 = np.std(points2)
    points1 /= s1
    points2 /= s2
 
    U, S, Vt = np.linalg.svd(points1.T * points2)
    R = (U * Vt).T
    return np.vstack([np.hstack(((s2 / s1) * R,
                                       c2.T - (s2 / s1) * R * c1.T)),
                         np.matrix([0., 0., 1.])])

I = cv2.imread('img.jpg')
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cuda')
point = fa.get_landmarks(I) 

front256 = get_position(256)
video = []
if(point is not None):
    shape = np.array(point[0])
    shape = shape[17:]
    M = transformation_from_points(np.matrix(shape), np.matrix(front256))
    
    img = cv2.warpAffine(scene, M[:2], (256, 256))
    (x, y) = front256[-20:].mean(0).astype(np.int32)
    w = 160//2
    img = img[y-w//2:y+w//2,x-w:x+w,...]
    img = cv2.resize(img, (128, 64))
    video.append(img)

This code worked for me. It uses affine transformation to align the face. Source: https://github.com/Fengdalu/LipNet-PyTorch/blob/master/demo.py

Jul 29 '20 04:07 imadtoubal

Could you please write tutorial on actual face alignment ?

Because right now it hard to guess why get_points consists of those numbers, why we take slice as "shape[17:]" and so on.

Oct 11 '20 11:10 Proger666

For those of you that are looking for alignment and how its done, you can have a look here basically there are many ways you can do it, but the implementation that I have linked to, is both easy to follow and very good performance wise.

Oct 11 '20 16:10 Coderx7

def get_position(size, padding=0.25):
    
    x = [0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
                    0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
                    0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
                    0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
                    0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
                    0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
                    0.553364, 0.490127, 0.42689]
    
    y = [0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
                    0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
                    0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
                    0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
                    0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
                    0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
                    0.784792, 0.824182, 0.831803, 0.824182]
    
    x, y = np.array(x), np.array(y)
    
    x = (x + padding) / (2 * padding + 1)
    y = (y + padding) / (2 * padding + 1)
    x = x * size
    y = y * size
    return np.array(list(zip(x, y)))

def transformation_from_points(points1, points2):
    points1 = points1.astype(np.float64)
    points2 = points2.astype(np.float64)
 
    c1 = np.mean(points1, axis=0)
    c2 = np.mean(points2, axis=0)
    points1 -= c1
    points2 -= c2
    s1 = np.std(points1)
    s2 = np.std(points2)
    points1 /= s1
    points2 /= s2
 
    U, S, Vt = np.linalg.svd(points1.T * points2)
    R = (U * Vt).T
    return np.vstack([np.hstack(((s2 / s1) * R,
                                       c2.T - (s2 / s1) * R * c1.T)),
                         np.matrix([0., 0., 1.])])

I = cv2.imread('img.jpg')
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cuda')
point = fa.get_landmarks(I) 

front256 = get_position(256)
video = []
if(point is not None):
    shape = np.array(point[0])
    shape = shape[17:]
    M = transformation_from_points(np.matrix(shape), np.matrix(front256))
    
    img = cv2.warpAffine(scene, M[:2], (256, 256))
    (x, y) = front256[-20:].mean(0).astype(np.int32)
    w = 160//2
    img = img[y-w//2:y+w//2,x-w:x+w,...]
    img = cv2.resize(img, (128, 64))
    video.append(img)

This code worked for me. It uses affine transformation to align the face. Source: https://github.com/Fengdalu/LipNet-PyTorch/blob/master/demo.py

How to get scene?

the scene variable is not assigned to anything, throwing an error,please check @Coderx7 @imadtoubal @Proger666

Nov 30 '20 02:11 amrahsmaytas

For Anyone Still Looking, I'm using umeyame in 3d. https://github.com/iperov/DeepFaceLab/blob/master/core/mathlib/umeyama.py

If anyone has a better Idea, I'd love to hear it.


import numpy as np
import face_alignment
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from skimage import io
import collections

from core.mathlib import umeyama
from facelib import LandmarksProcessor, FaceType
import cv2


face_detector = 'sfd'
# Run the 3D face alignment on a test image, without CUDA.
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device='cuda', flip_input=True,
                                  face_detector=face_detector)


try:
    i = 0
    input_img = io.imread(IMG HERE)
except FileNotFoundError:
    input_img = io.imread('test/assets/aflw-test.jpg')

facepoints = fa.get_landmarks(input_img)[-1]


landmarks_68_3D = np.array([
    [-73.393523, -29.801432, 47.667532],  # 00
    [-72.775014, -10.949766, 45.909403],  # 01
    [-70.533638, 7.929818, 44.842580],  # 02
    [-66.850058, 26.074280, 43.141114],  # 03
    [-59.790187, 42.564390, 38.635298],  # 04
    [-48.368973, 56.481080, 30.750622],  # 05
    [-34.121101, 67.246992, 18.456453],  # 06
    [-17.875411, 75.056892, 3.609035],  # 07
    [0.098749, 77.061286, -0.881698],  # 08
    [17.477031, 74.758448, 5.181201],  # 09
    [32.648966, 66.929021, 19.176563],  # 10
    [46.372358, 56.311389, 30.770570],  # 11
    [57.343480, 42.419126, 37.628629],  # 12
    [64.388482, 25.455880, 40.886309],  # 13
    [68.212038, 6.990805, 42.281449],  # 14
    [70.486405, -11.666193, 44.142567],  # 15
    [71.375822, -30.365191, 47.140426],  # 16
    [-61.119406, -49.361602, 14.254422],  # 17
    [-51.287588, -58.769795, 7.268147],  # 18
    [-37.804800, -61.996155, 0.442051],  # 19
    [-24.022754, -61.033399, -6.606501],  # 20
    [-11.635713, -56.686759, -11.967398],  # 21
    [12.056636, -57.391033, -12.051204],  # 22
    [25.106256, -61.902186, -7.315098],  # 23
    [38.338588, -62.777713, -1.022953],  # 24
    [51.191007, -59.302347, 5.349435],  # 25
    [60.053851, -50.190255, 11.615746],  # 26
    [0.653940, -42.193790, -13.380835],  # 27
    [0.804809, -30.993721, -21.150853],  # 28
    [0.992204, -19.944596, -29.284036],  # 29
    [1.226783, -8.414541, -36.948060],  # 00
    [-14.772472, 2.598255, -20.132003],  # 01
    [-7.180239, 4.751589, -23.536684],  # 02
    [0.555920, 6.562900, -25.944448],  # 03
    [8.272499, 4.661005, -23.695741],  # 04
    [15.214351, 2.643046, -20.858157],  # 05
    [-46.047290, -37.471411, 7.037989],  # 06
    [-37.674688, -42.730510, 3.021217],  # 07
    [-27.883856, -42.711517, 1.353629],  # 08
    [-19.648268, -36.754742, -0.111088],  # 09
    [-28.272965, -35.134493, -0.147273],  # 10
    [-38.082418, -34.919043, 1.476612],  # 11
    [19.265868, -37.032306, -0.665746],  # 12
    [27.894191, -43.342445, 0.247660],  # 13
    [37.437529, -43.110822, 1.696435],  # 14
    [45.170805, -38.086515, 4.894163],  # 15
    [38.196454, -35.532024, 0.282961],  # 16
    [28.764989, -35.484289, -1.172675],  # 17
    [-28.916267, 28.612716, -2.240310],  # 18
    [-17.533194, 22.172187, -15.934335],  # 19
    [-6.684590, 19.029051, -22.611355],  # 20
    [0.381001, 20.721118, -23.748437],  # 21
    [8.375443, 19.035460, -22.721995],  # 22
    [18.876618, 22.394109, -15.610679],  # 23
    [28.794412, 28.079924, -3.217393],  # 24
    [19.057574, 36.298248, -14.987997],  # 25
    [8.956375, 39.634575, -22.554245],  # 26
    [0.381549, 40.395647, -23.591626],  # 27
    [-7.428895, 39.836405, -22.406106],  # 28
    [-18.160634, 36.677899, -15.121907],  # 29
    [-24.377490, 28.677771, -4.785684],  # 30
    [-6.897633, 25.475976, -20.893742],  # 31
    [0.340663, 26.014269, -22.220479],  # 32
    [8.444722, 25.326198, -21.025520],  # 33
    [24.474473, 28.323008, -5.712776],  # 34
    [8.449166, 30.596216, -20.671489],  # 35
    [0.205322, 31.408738, -21.903670],  # 36
    [-7.198266, 30.844876, -20.328022]  # 37
], dtype=np.float32)




#normalize everything to image range
pts2 = np.float32(( (0,0),(160,0),(160,160) ))
centered_face  = facepoints - facepoints.mean(axis=0)
landmarks_68_3D  = landmarks_68_3D #- landmarks_68_3D.mean(axis=0)



def transform_points(points, mat, invert=False):
    if invert:
        mat = np.linalg.inv(mat)
    ones = np.zeros(points.shape[0])
    points = np.column_stack((points, ones))
    points = ([email protected]).T
    points = points[:, :3]
    return points


pred_type = collections.namedtuple('prediction_type', ['slice', 'color'])
pred_types = {'face': pred_type(slice(0, 17), (0.682, 0.780, 0.909, 0.5)),
              'eyebrow1': pred_type(slice(17, 22), (1.0, 0.498, 0.055, 0.4)),
              'eyebrow2': pred_type(slice(22, 27), (1.0, 0.498, 0.055, 0.4)),
              'nose': pred_type(slice(27, 31), (0.345, 0.239, 0.443, 0.4)),
              'nostril': pred_type(slice(31, 36), (0.345, 0.239, 0.443, 0.4)),
              'eye1': pred_type(slice(36, 42), (0.596, 0.875, 0.541, 0.3)),
              'eye2': pred_type(slice(42, 48), (0.596, 0.875, 0.541, 0.3)),
              'lips': pred_type(slice(48, 60), (0.596, 0.875, 0.541, 0.3)),
              'teeth': pred_type(slice(60, 68), (0.596, 0.875, 0.541, 0.4))
              }

fig = plt.figure(figsize=plt.figaspect(.5))
ax = fig.add_subplot(2, 3, 1, projection='3d')

# plot 1
surf = ax.scatter(centered_face[:, 0],
                  centered_face[:, 1],
                  centered_face[:, 2],
                  c='cyan',
                  alpha=1.0,
                  edgecolor='b')

for pred_type in pred_types.values():
    ax.plot3D(centered_face[pred_type.slice, 0],
              centered_face[pred_type.slice, 1],
              centered_face[pred_type.slice, 2], color='blue')

ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
ax.title.set_text("input_face")


ax = fig.add_subplot(2, 3, 2, projection='3d')
surf = ax.scatter(landmarks_68_3D[:, 0],
                  landmarks_68_3D[:, 1],
                  landmarks_68_3D[:, 2],
                  c='cyan',
                  alpha=1.0,
                  edgecolor='b')

for pred_type in pred_types.values():
    ax.plot3D(landmarks_68_3D[pred_type.slice, 0],
              landmarks_68_3D[pred_type.slice, 1],
              landmarks_68_3D[pred_type.slice, 2], color='blue')

ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
ax.title.set_text("input landmarks")

# transformed plot





mat = umeyama(centered_face,landmarks_68_3D, True)
transformed_face = transform_points(centered_face,mat,False)





ax = fig.add_subplot(2, 3, 3, projection='3d')
surf = ax.scatter(transformed_face[:, 0],
                  transformed_face[:, 1],
                  transformed_face[:, 2],
                  c='cyan',
                  alpha=1.0,
                  edgecolor='b')

for pred_type in pred_types.values():
    ax.plot3D(transformed_face[pred_type.slice, 0],
              transformed_face[pred_type.slice, 1],
              transformed_face[pred_type.slice, 2], color='blue')

ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
ax.title.set_text("transformed input")

plt.show()

Feb 02 '21 10:02 josephbiko

hi，i want to known how can i get landmarks_68_3D？

Mar 24 '21 08:03 wywywy01

Either use these, or checkout basel face model

Mar 24 '21 11:03 josephbiko

def get_position(size, padding=0.25):
    
    x = [0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
                    0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
                    0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
                    0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
                    0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
                    0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
                    0.553364, 0.490127, 0.42689]
    
    y = [0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
                    0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
                    0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
                    0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
                    0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
                    0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
                    0.784792, 0.824182, 0.831803, 0.824182]
    
    x, y = np.array(x), np.array(y)
    
    x = (x + padding) / (2 * padding + 1)
    y = (y + padding) / (2 * padding + 1)
    x = x * size
    y = y * size
    return np.array(list(zip(x, y)))

def transformation_from_points(points1, points2):
    points1 = points1.astype(np.float64)
    points2 = points2.astype(np.float64)
 
    c1 = np.mean(points1, axis=0)
    c2 = np.mean(points2, axis=0)
    points1 -= c1
    points2 -= c2
    s1 = np.std(points1)
    s2 = np.std(points2)
    points1 /= s1
    points2 /= s2
 
    U, S, Vt = np.linalg.svd(points1.T * points2)
    R = (U * Vt).T
    return np.vstack([np.hstack(((s2 / s1) * R,
                                       c2.T - (s2 / s1) * R * c1.T)),
                         np.matrix([0., 0., 1.])])

I = cv2.imread('img.jpg')
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cuda')
point = fa.get_landmarks(I) 

front256 = get_position(256)
video = []
if(point is not None):
    shape = np.array(point[0])
    shape = shape[17:]
    M = transformation_from_points(np.matrix(shape), np.matrix(front256))
    
    img = cv2.warpAffine(scene, M[:2], (256, 256))
    (x, y) = front256[-20:].mean(0).astype(np.int32)
    w = 160//2
    img = img[y-w//2:y+w//2,x-w:x+w,...]
    img = cv2.resize(img, (128, 64))
    video.append(img)

This code worked for me. It uses affine transformation to align the face. Source: https://github.com/Fengdalu/LipNet-PyTorch/blob/master/demo.py

How to get scene?

the scene variable is not assigned to anything, throwing an error,please check @Coderx7 @imadtoubal @Proger666

scene is the image that you want to align as a np.array

Jul 03 '21 04:07 mea-lab

Which is this library "facelib"? Can you please provide the link to its source code?

I found the following libraries with the same name:

https://github.com/sajjjadayobi/FaceLib
https://github.com/kutayyildiz/facelib

Dec 01 '21 14:12 ravikiranrao

I know, a bit odd. But I stole it from: https://github.com/iperov/DeepFaceLab/tree/master/facelib

good luck!

Dec 01 '21 15:12 josephbiko

Thanks @josephbiko though you are not using those in the current code. Apologies.

Dec 01 '21 15:12 ravikiranrao

For Anyone Still Looking, I'm using umeyame in 3d. https://github.com/iperov/DeepFaceLab/blob/master/core/mathlib/umeyama.py

If anyone has a better Idea, I'd love to hear it.


import numpy as np
import face_alignment
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from skimage import io
import collections

from core.mathlib import umeyama
from facelib import LandmarksProcessor, FaceType
import cv2


face_detector = 'sfd'
# Run the 3D face alignment on a test image, without CUDA.
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device='cuda', flip_input=True,
                                  face_detector=face_detector)


try:
    i = 0
    input_img = io.imread(IMG HERE)
except FileNotFoundError:
    input_img = io.imread('test/assets/aflw-test.jpg')

facepoints = fa.get_landmarks(input_img)[-1]


landmarks_68_3D = np.array([
    [-73.393523, -29.801432, 47.667532],  # 00
    [-72.775014, -10.949766, 45.909403],  # 01
    [-70.533638, 7.929818, 44.842580],  # 02
    [-66.850058, 26.074280, 43.141114],  # 03
    [-59.790187, 42.564390, 38.635298],  # 04
    [-48.368973, 56.481080, 30.750622],  # 05
    [-34.121101, 67.246992, 18.456453],  # 06
    [-17.875411, 75.056892, 3.609035],  # 07
    [0.098749, 77.061286, -0.881698],  # 08
    [17.477031, 74.758448, 5.181201],  # 09
    [32.648966, 66.929021, 19.176563],  # 10
    [46.372358, 56.311389, 30.770570],  # 11
    [57.343480, 42.419126, 37.628629],  # 12
    [64.388482, 25.455880, 40.886309],  # 13
    [68.212038, 6.990805, 42.281449],  # 14
    [70.486405, -11.666193, 44.142567],  # 15
    [71.375822, -30.365191, 47.140426],  # 16
    [-61.119406, -49.361602, 14.254422],  # 17
    [-51.287588, -58.769795, 7.268147],  # 18
    [-37.804800, -61.996155, 0.442051],  # 19
    [-24.022754, -61.033399, -6.606501],  # 20
    [-11.635713, -56.686759, -11.967398],  # 21
    [12.056636, -57.391033, -12.051204],  # 22
    [25.106256, -61.902186, -7.315098],  # 23
    [38.338588, -62.777713, -1.022953],  # 24
    [51.191007, -59.302347, 5.349435],  # 25
    [60.053851, -50.190255, 11.615746],  # 26
    [0.653940, -42.193790, -13.380835],  # 27
    [0.804809, -30.993721, -21.150853],  # 28
    [0.992204, -19.944596, -29.284036],  # 29
    [1.226783, -8.414541, -36.948060],  # 00
    [-14.772472, 2.598255, -20.132003],  # 01
    [-7.180239, 4.751589, -23.536684],  # 02
    [0.555920, 6.562900, -25.944448],  # 03
    [8.272499, 4.661005, -23.695741],  # 04
    [15.214351, 2.643046, -20.858157],  # 05
    [-46.047290, -37.471411, 7.037989],  # 06
    [-37.674688, -42.730510, 3.021217],  # 07
    [-27.883856, -42.711517, 1.353629],  # 08
    [-19.648268, -36.754742, -0.111088],  # 09
    [-28.272965, -35.134493, -0.147273],  # 10
    [-38.082418, -34.919043, 1.476612],  # 11
    [19.265868, -37.032306, -0.665746],  # 12
    [27.894191, -43.342445, 0.247660],  # 13
    [37.437529, -43.110822, 1.696435],  # 14
    [45.170805, -38.086515, 4.894163],  # 15
    [38.196454, -35.532024, 0.282961],  # 16
    [28.764989, -35.484289, -1.172675],  # 17
    [-28.916267, 28.612716, -2.240310],  # 18
    [-17.533194, 22.172187, -15.934335],  # 19
    [-6.684590, 19.029051, -22.611355],  # 20
    [0.381001, 20.721118, -23.748437],  # 21
    [8.375443, 19.035460, -22.721995],  # 22
    [18.876618, 22.394109, -15.610679],  # 23
    [28.794412, 28.079924, -3.217393],  # 24
    [19.057574, 36.298248, -14.987997],  # 25
    [8.956375, 39.634575, -22.554245],  # 26
    [0.381549, 40.395647, -23.591626],  # 27
    [-7.428895, 39.836405, -22.406106],  # 28
    [-18.160634, 36.677899, -15.121907],  # 29
    [-24.377490, 28.677771, -4.785684],  # 30
    [-6.897633, 25.475976, -20.893742],  # 31
    [0.340663, 26.014269, -22.220479],  # 32
    [8.444722, 25.326198, -21.025520],  # 33
    [24.474473, 28.323008, -5.712776],  # 34
    [8.449166, 30.596216, -20.671489],  # 35
    [0.205322, 31.408738, -21.903670],  # 36
    [-7.198266, 30.844876, -20.328022]  # 37
], dtype=np.float32)




#normalize everything to image range
pts2 = np.float32(( (0,0),(160,0),(160,160) ))
centered_face  = facepoints - facepoints.mean(axis=0)
landmarks_68_3D  = landmarks_68_3D #- landmarks_68_3D.mean(axis=0)



def transform_points(points, mat, invert=False):
    if invert:
        mat = np.linalg.inv(mat)
    ones = np.zeros(points.shape[0])
    points = np.column_stack((points, ones))
    points = ([email protected]).T
    points = points[:, :3]
    return points


pred_type = collections.namedtuple('prediction_type', ['slice', 'color'])
pred_types = {'face': pred_type(slice(0, 17), (0.682, 0.780, 0.909, 0.5)),
              'eyebrow1': pred_type(slice(17, 22), (1.0, 0.498, 0.055, 0.4)),
              'eyebrow2': pred_type(slice(22, 27), (1.0, 0.498, 0.055, 0.4)),
              'nose': pred_type(slice(27, 31), (0.345, 0.239, 0.443, 0.4)),
              'nostril': pred_type(slice(31, 36), (0.345, 0.239, 0.443, 0.4)),
              'eye1': pred_type(slice(36, 42), (0.596, 0.875, 0.541, 0.3)),
              'eye2': pred_type(slice(42, 48), (0.596, 0.875, 0.541, 0.3)),
              'lips': pred_type(slice(48, 60), (0.596, 0.875, 0.541, 0.3)),
              'teeth': pred_type(slice(60, 68), (0.596, 0.875, 0.541, 0.4))
              }

fig = plt.figure(figsize=plt.figaspect(.5))
ax = fig.add_subplot(2, 3, 1, projection='3d')

# plot 1
surf = ax.scatter(centered_face[:, 0],
                  centered_face[:, 1],
                  centered_face[:, 2],
                  c='cyan',
                  alpha=1.0,
                  edgecolor='b')

for pred_type in pred_types.values():
    ax.plot3D(centered_face[pred_type.slice, 0],
              centered_face[pred_type.slice, 1],
              centered_face[pred_type.slice, 2], color='blue')

ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
ax.title.set_text("input_face")


ax = fig.add_subplot(2, 3, 2, projection='3d')
surf = ax.scatter(landmarks_68_3D[:, 0],
                  landmarks_68_3D[:, 1],
                  landmarks_68_3D[:, 2],
                  c='cyan',
                  alpha=1.0,
                  edgecolor='b')

for pred_type in pred_types.values():
    ax.plot3D(landmarks_68_3D[pred_type.slice, 0],
              landmarks_68_3D[pred_type.slice, 1],
              landmarks_68_3D[pred_type.slice, 2], color='blue')

ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
ax.title.set_text("input landmarks")

# transformed plot





mat = umeyama(centered_face,landmarks_68_3D, True)
transformed_face = transform_points(centered_face,mat,False)





ax = fig.add_subplot(2, 3, 3, projection='3d')
surf = ax.scatter(transformed_face[:, 0],
                  transformed_face[:, 1],
                  transformed_face[:, 2],
                  c='cyan',
                  alpha=1.0,
                  edgecolor='b')

for pred_type in pred_types.values():
    ax.plot3D(transformed_face[pred_type.slice, 0],
              transformed_face[pred_type.slice, 1],
              transformed_face[pred_type.slice, 2], color='blue')

ax.view_init(elev=90., azim=90.)
ax.set_xlim(ax.get_xlim()[::-1])
ax.title.set_text("transformed input")

plt.show()

works for me

Apr 07 '22 07:04 lvZic

def get_position(size, padding=0.25):
    
    x = [0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
                    0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
                    0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
                    0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
                    0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
                    0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
                    0.553364, 0.490127, 0.42689]
    
    y = [0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
                    0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
                    0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
                    0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
                    0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
                    0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
                    0.784792, 0.824182, 0.831803, 0.824182]
    
    x, y = np.array(x), np.array(y)
    
    x = (x + padding) / (2 * padding + 1)
    y = (y + padding) / (2 * padding + 1)
    x = x * size
    y = y * size
    return np.array(list(zip(x, y)))

def transformation_from_points(points1, points2):
    points1 = points1.astype(np.float64)
    points2 = points2.astype(np.float64)
 
    c1 = np.mean(points1, axis=0)
    c2 = np.mean(points2, axis=0)
    points1 -= c1
    points2 -= c2
    s1 = np.std(points1)
    s2 = np.std(points2)
    points1 /= s1
    points2 /= s2
 
    U, S, Vt = np.linalg.svd(points1.T * points2)
    R = (U * Vt).T
    return np.vstack([np.hstack(((s2 / s1) * R,
                                       c2.T - (s2 / s1) * R * c1.T)),
                         np.matrix([0., 0., 1.])])

I = cv2.imread('img.jpg')
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cuda')
point = fa.get_landmarks(I) 

front256 = get_position(256)
video = []
if(point is not None):
    shape = np.array(point[0])
    shape = shape[17:]
    M = transformation_from_points(np.matrix(shape), np.matrix(front256))
    
    img = cv2.warpAffine(scene, M[:2], (256, 256))
    (x, y) = front256[-20:].mean(0).astype(np.int32)
    w = 160//2
    img = img[y-w//2:y+w//2,x-w:x+w,...]
    img = cv2.resize(img, (128, 64))
    video.append(img)

This code worked for me. It uses affine transformation to align the face. Source: https://github.com/Fengdalu/LipNet-PyTorch/blob/master/demo.py

I tried this as well by assigning "scene" variable to image itself and it seems to work but the output is just the mouth (aligned but just mouth), probably because LipNet is focusing on lips only. Since the math operations are confusing for me i couldn't figure out where exactly the cropping is done. Could you please help me to extract aligned face?

Sep 16 '22 07:09 fosforoglu

def get_position(size, padding=0.25):
    
    x = [0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
                    0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
                    0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
                    0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
                    0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
                    0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
                    0.553364, 0.490127, 0.42689]
    
    y = [0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
                    0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
                    0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
                    0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
                    0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
                    0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
                    0.784792, 0.824182, 0.831803, 0.824182]
    
    x, y = np.array(x), np.array(y)
    
    x = (x + padding) / (2 * padding + 1)
    y = (y + padding) / (2 * padding + 1)
    x = x * size
    y = y * size
    return np.array(list(zip(x, y)))

def transformation_from_points(points1, points2):
    points1 = points1.astype(np.float64)
    points2 = points2.astype(np.float64)
 
    c1 = np.mean(points1, axis=0)
    c2 = np.mean(points2, axis=0)
    points1 -= c1
    points2 -= c2
    s1 = np.std(points1)
    s2 = np.std(points2)
    points1 /= s1
    points2 /= s2
 
    U, S, Vt = np.linalg.svd(points1.T * points2)
    R = (U * Vt).T
    return np.vstack([np.hstack(((s2 / s1) * R,
                                       c2.T - (s2 / s1) * R * c1.T)),
                         np.matrix([0., 0., 1.])])

I = cv2.imread('img.jpg')
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cuda')
point = fa.get_landmarks(I) 

front256 = get_position(256)
video = []
if(point is not None):
    shape = np.array(point[0])
    shape = shape[17:]
    M = transformation_from_points(np.matrix(shape), np.matrix(front256))
    
    img = cv2.warpAffine(scene, M[:2], (256, 256))
    (x, y) = front256[-20:].mean(0).astype(np.int32)
    w = 160//2
    img = img[y-w//2:y+w//2,x-w:x+w,...]
    img = cv2.resize(img, (128, 64))
    video.append(img)

This code worked for me. It uses affine transformation to align the face. Source: https://github.com/Fengdalu/LipNet-PyTorch/blob/master/demo.py

I tried this as well by assigning "scene" variable to image itself and it seems to work but the output is just the mouth (aligned but just mouth), probably because LipNet is focusing on lips only. Since the math operations are confusing for me i couldn't figure out where exactly the cropping is done. Could you please help me to extract aligned face?

You should look at those lines:

    w = 160//2
    img = img[y-w//2:y+w//2,x-w:x+w,...]
    img = cv2.resize(img, (128, 64))

If you want to obtain the full face, just remove this crop.

Nov 24 '22 14:11 pepealessio

@josephbiko Hi I was wondering. After aligning the landmarks, how did you project/warpAffine the original image onto the aligned landmarks? Thanks

May 09 '23 00:05 f-izzat

mark

Nov 15 '23 09:11 cucdengjunli

face-alignment face-alignment copied to clipboard

How to do the actual alignment?

face-alignment
face-alignment copied to clipboard