Categories
code machine learning opencv programming python vision

Aligning faces with py opencv-dlib combo

Using opencv and dlib to extract and align faces in images to use a dataset for learning.

This is my first trial at using Jupyter notebook to write a post, hope it makes sense.
I’ve recently taught a class on generative models: http://hi.cs.stonybrook.edu/teaching/cdt450
In class we’ve manipulated face images with neural networks.
One important thing I found that helped is to align the images so the facial features overlap.
It helps the nets learn the variance in faces better, rather than waste their “representation power” on the shift between faces.
The following is some code to align face images using the excellent Dlib (python bindings) http://dlib.net. First I’m just using a standard face detector, and then using the facial fatures extractor I’m using that information for a complete alignment of the face.
After the alignment – I’m just having fun with the aligned dataset 🙂

First we include some necessary packages:

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm,colors,rc
import random
import warnings
import cv2
from IPython import display
from IPython.display import HTML
import os
import shutil
import time
import urllib
import zipfile
import tarfile
import keras.utils as utils
import progressbar
import imageio
%matplotlib inline
rc('figure',figsize=(15,5))
warnings.filterwarnings('ignore')

A utility to download and unzip/untar a file

def download_dataset(temp_dl_filename,data_directory,url=None,squash_dir=False,remove_tmp_file=True):
    if not os.path.exists(data_directory):
        filename = temp_dl_filename
        if url is not None:
            filename = utils.get_file(temp_dl_filename, url)
        print("Unzipping...")
        if filename[-3:] == 'zip':
            zf = zipfile.ZipFile(filename)
            print("Calculate total size...")
            uncompress_size = sum((zfile.file_size for zfile in zf.infolist()))
            extracted_size = 0
            pb = progressbar.ProgressBar(max_value=uncompress_size)
            for i,zfile in enumerate(zf.infolist()):
                extracted_size += zfile.file_size
                pb.update(extracted_size)
                if zfile.filename[-1] == '/':
                    continue
                if squash_dir:
                    zfile.filename = os.path.basename(zfile.filename)
                zf.extract(zfile, data_directory)
        if filename[-6:] == 'tar.gz':
            zf = tarfile.open(filename,'r:gz')
            print("Calculate total size...")
            uncompress_size = sum((tzfile.size if tzfile.isfile() else 0 for tzfile in zf.getmembers()))
            extracted_size = 0
            pb = progressbar.ProgressBar(max_value=uncompress_size)
            for i,zfile in enumerate(zf.getmembers()):
                if zfile.isfile() and zfile.isreg():
                    extracted_size += zfile.size
                    pb.update(extracted_size)
                    if squash_dir:
                        zfile.name = os.path.basename(zfile.name)
                    zf.extract(zfile, data_directory)
        if remove_tmp_file:
            os.remove(filename)
        print("Done")
    else:
        print ("Data already exists.")

Aligning faces images

This function will crop and scale the faces in the image with a facial landmark detector, to match the alignment of the faces in the celebA dataset (http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html).
To use it, download http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 first.

import dlib
from sklearn.ensemble import IsolationForest
def face_data_normalizer(images_directory_input,
                         images_directory_output,
                         output_size=256,
                         align_faces_=True,
                         limit_num_faces_=None,
                         limit_num_files_=None):
    def write_faces_to_disk(directory, faces):
        print("writing faces to disk...")
        if os.path.exists(directory):
            shutil.rmtree(directory)
        print('creating output directory: %s'%(directory))
        os.mkdir(directory)
        for i in range(faces.shape[0]):
            cv2.imwrite(''.join([directory,"%03d.jpg"%i]),faces[i,:,:,::-1])
        print("wrote %d faces"%(faces.shape[0]))
    if images_directory_input[-1] != '/':
        images_directory_input += '/'
    if images_directory_output[-1] != '/':
        images_directory_output += '/'
    faces = []
    if os.path.exists(images_directory_output):
        print('data already preprocessed? loading preprocessed files...')
        for img_idx,img_file in enumerate(os.listdir(images_directory_output)):
            # load the input image, resize it, and convert it to grayscale
            image = cv2.imread(''.join([images_directory_output,img_file]))
            if image is None: continue
            image = image[:,:,::-1] #BGR to RGB
            faces.append(np.expand_dims(image,0))
        faces = np.asarray(faces)
        print('loaded %d preprocessed images'%(faces.shape[0]))
        if remove_outliers_:
            faces,num_outliers = remove_outliers(faces)
        write_faces_to_disk(images_directory_output,faces)
        return faces
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
    max_val = len(os.listdir(images_directory_input)) if limit_num_files_ is None else limit_num_files_
    pb = display.ProgressBar(max_val)
    pb.display()
    face_counter = 0
    for img_idx,img_file in enumerate(os.listdir(images_directory_input)):
        # load the input image, resize it, and convert it to grayscale
        image = cv2.imread(''.join([images_directory_input,img_file]))
        if image is None:
            continue
        image = image[:,:,::-1] #BGR to RGB
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        # detect faces in the grayscale image
        rects = detector(gray, 1)
        if len(rects) > 0:
            # loop over the face detections
            for (i, rect) in enumerate(rects):
                if align_faces_:
                    ######### Align with facial features detector #########
                    shape = predictor(gray, rect) # get facial features
                    shape = np.array([(shape.part(j).x, shape.part(j).y) for j in range(shape.num_parts)])
                    # center and scale face around mid point between eyes
                    center_eyes = shape[27].astype(np.int)
                    eyes_d = np.linalg.norm(shape[36]-shape[45])
                    face_size_x = int(eyes_d * 2.)
                    if face_size_x < 50: continue
                    # rotate to normalized angle
                    d = (shape[45] - shape[36]) / eyes_d # normalized eyes-differnce vector (direction)
                    a = np.rad2deg(np.arctan2(d[1],d[0])) # angle
                    scale_factor = float(output_size) / float(face_size_x * 2.) # scale to fit in output_size
                    # rotation (around center_eyes) + scale transform
                    M = np.append(cv2.getRotationMatrix2D((center_eyes[0], center_eyes[1]),a,scale_factor),[[0,0,1]], axis=0)
                    # apply shift from center_eyes to middle of output_size
                    M1 = np.array([[1.,0.,-center_eyes[0]+output_size/2.],
                                   [0.,1.,-center_eyes[1]+output_size/2.],
                                   [0,0,1.]])
                    # concatenate transforms (rotation-scale + translation)
                    M = M1.dot(M)[:2]
                    # warp
                    try:
                        face = cv2.warpAffine(image, M, (output_size, output_size), borderMode=cv2.BORDER_REPLICATE)
                    except:
                        continue
                    face_counter += 1
                    face = cv2.resize(face,(output_size,output_size))
                    faces.append(face)
                else:
                    ######### "No align" with just the detector #########
                    if rect.width() < 50: continue
                    # find scale factor
                    scale_factor = float(output_size) / float(rect.width() * 2.) # scale to fit in output_size
                    # scale around the center of the face (shift a bit for the approximate y-position of the eyes)
                    M = np.append(cv2.getRotationMatrix2D((rect.center().x,rect.center().y-rect.height()/6.),0,scale_factor),[[0,0,1]], axis=0)
                    # apply shift from center_eyes to middle of output_size
                    M1 = np.array([[1.,0.,-rect.center().x+output_size/2.],
                                   [0.,1.,-rect.center().y+output_size/2.+rect.height()/6.],
                                   [0,0,1.]])
                    # concatenate transforms (rotation-scale + translation)
                    M = M1.dot(M)[:2]
                    try:
                        face = cv2.warpAffine(image, M, (output_size, output_size), borderMode=cv2.BORDER_REPLICATE)
                    except:
                        continue
                    face_counter += 1
                    faces.append(face)
        pb.progress = img_idx+1
        if limit_num_faces_ is not None and faces.shape[0] > limit_num_faces_:
            break
        if limit_num_files_ is not None and img_idx >= limit_num_files_:
            break
    faces = np.asarray(faces)
    write_faces_to_disk(images_directory_output,faces)
    return faces

Download the faces dataset

We will use the FDDB: http://vis-www.cs.umass.edu/fddb/

people_zip_filename = 'originalPics.tar.gz'
photos_url = 'http://tamaraberg.com/faceDataset/originalPics.tar.gz'
# download, unzip and squash to a single directory with all the files
download_dataset(people_zip_filename, 'data/faces/raw', url=photos_url, remove_tmp_file=True, squash_dir=True)
    Downloading data from http://tamaraberg.com/faceDataset/originalPics.tar.gz
    579067904/579061091 [==============================] - 44s 0us/step
    Unzipping...
    Calculate total size...
     98% (587275530 of 594789979) |########## | Elapsed Time: 0:00:06 ETA:  0:00:00
    Done

Align the faces

Use the face alignment tool

if os.path.isdir('data/faces/noalign'): shutil.rmtree('data/faces/noalign')
faces_noalign = face_data_normalizer('data/faces/raw',
                                     'data/faces/noalign',
                                     align_faces_=False,
                                     remove_outliers_=False,
                                     limit_num_files_=100,
                                     output_size=256);
    writing faces to disk...
    creating output directory: data/faces/noalign/
    wrote 141 faces
if os.path.isdir('data/faces/align'): shutil.rmtree('data/faces/align')
faces_align = face_data_normalizer('data/faces/raw',
                                   'data/faces/align',
                                   align_faces_=True,
                                   remove_outliers_=False,
                                   limit_num_files_=None,
                                   output_size=256);
    writing faces to disk...
    creating output directory: data/faces/align/
    wrote 4805 faces

Visualize the faces dataset

Aligned

plt.figure(figsize=(8,4))
for i in range(np.min([faces_align.shape[0],28])):
    plt.subplot(4,7,i+1),plt.xticks(()),plt.yticks(()),plt.imshow(faces_align[i])


Not aligned

plt.figure(figsize=(8,4))
for i in range(np.min([faces_noalign.shape[0],28])):
    plt.subplot(4,7,i+1),plt.xticks(()),plt.yticks(()),plt.imshow(faces_noalign[i])

A tool to produce a GIF from lists of images, also able to apply an average sliding window between them

def show_gif(images_, images_average=10):
    faces_len = np.min([100,images_.shape[0]]) - images_average
    pb = display.ProgressBar(faces_len)
    pb.display()
    os.remove('movie.gif')
    with imageio.get_writer('movie.gif', mode='I') as writer:
        for i in range(faces_len):
            pb.progress = f+1
            writer.append_data((np.mean(images_[i:i + images_average]/255.,axis=0) * 255.).astype(np.uint8))
    display.clear_output(wait=True)
    return HTML('<img src="movie.gif?%s">'%(random.randint(0,1000)))

Face Averages

plt.figure(figsize=(4,8))
plt.subplot(121),plt.xticks(()),plt.yticks(()),plt.imshow(np.mean(faces_noalign / 255.,axis=0)),plt.title('Not aligned')
plt.subplot(122),plt.xticks(()),plt.yticks(()),plt.imshow(np.mean(faces_align / 255.,axis=0)),plt.title('Aligned');


Animations:
Aligned

show_gif(faces_align, images_average=15)


Not aligned

show_gif(faces_noalign, images_average=15)


Naturally, the aligned faces have more facial features in common.

Variational Autoencoder

from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, Flatten, Reshape, Lambda, Conv2DTranspose, BatchNormalization
from keras.models import Model
from keras import metrics
import keras.backend as K
'''
    Inspiration: https://github.com/keras-team/keras/blob/master/examples/variational_autoencoder_deconv.py
'''
def ConvolutionalVAE(img_rows,img_cols,img_chns,mse=True):
    filters = 16 # number of convolutional filters to use
    kernel_size = 3 # convolution kernel size
    original_img_size = (img_rows, img_cols, img_chns)
    latent_dim = 100
    intermediate_dim = 128
    epsilon_std = 1.0
    inputs = Input(shape=original_img_size, name='encoder_input')
    x = inputs
    for i in range(2):
        filters *= 2
        x = Conv2D(filters=filters,
                   kernel_size=kernel_size,
                   activation='relu',
                   strides=2,
                   padding='same')(x)
    # shape info needed to build decoder model
    shape = K.int_shape(x)
    # generate latent vector Q(z|X)
    x = Flatten()(x)
#     x = Dense(16, activation='relu')(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)
    def sampling(args):
        z_mean_, z_log_var_ = args
        batch = K.shape(z_mean_)[0]
        dim = K.int_shape(z_mean_)[1]
        # by default, random_normal has mean=0 and std=1.0
        epsilon = K.random_normal(shape=(batch, dim))
        return z_mean_ + K.exp(0.5 * z_log_var_) * epsilon
    # use reparameterization trick to push the sampling out as input
    # note that "output_shape" isn't necessary with the TensorFlow backend
    z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
    # instantiate encoder model
    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
    encoder.summary()
    # build decoder model
    latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
    x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(latent_inputs)
    x = Reshape((shape[1], shape[2], shape[3]))(x)
    for i in range(2):
        x = Conv2DTranspose(filters=filters,
                            kernel_size=kernel_size,
                            activation='relu',
                            strides=2,
                            padding='same')(x)
        filters //= 2
    outputs = Conv2DTranspose(filters=img_chns,
                              kernel_size=kernel_size,
                              activation='sigmoid',
                              padding='same',
                              name='decoder_output')(x)
    # instantiate decoder model
    decoder = Model(latent_inputs, outputs, name='decoder')
    decoder.summary()
    # instantiate VAE model
    outputs = decoder(encoder(inputs)[2])
    vae = Model(inputs, outputs, name='vae')
    reconstruction_loss = img_cols * img_rows * metrics.mse(K.flatten(inputs), K.flatten(outputs))
    kl_loss = K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) * -0.5
    vae_loss = K.mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)
    vae.compile(optimizer='rmsprop')
    return vae,encoder,decoder

A callback to render the losses as a graph

from keras.callbacks import Callback
class PlotLosses(Callback):
    def __init__(self, losses_names):
        self.losses = dict()
        self.losses_names = losses_names
        for ln in self.losses_names:
            self.losses[ln] = []
    def on_train_begin(self, logs={}):
        pass
    def on_epoch_end(self, epoch, logs={}):
        display.clear_output(wait=False)
        plt.figure(figsize=(10,5))
        for ln in self.losses_names:
            if ln not in self.losses_names: continue
            self.losses[ln].append(logs.get(ln))
            plt.plot(np.array(self.losses[ln]),label=ln)
        plt.ylabel('loss'),plt.xlabel('epochs')
        plt.legend()
        plt.grid(True)
        plt.tick_params(labelright=True)
        plt.show()
K.clear_session()
vae,encoder,generator = ConvolutionalVAE(256, 256 ,3)

Train the VAE

vae.fit(faces_align / 256.,
        epochs=50,
        batch_size=32,
        validation_split=0.1,
       callbacks=[PlotLosses(['loss','val_loss'])]);

Apply VAE to find faces in inanimate objects

Objects were downloaded from Google Images search – they may be copyrighted! (Oops)
Inspired by this Medium post: https://medium.com/starts-with-a-bang/averaging-inanimate-objects-can-produce-human-faces-1a80cd1448d8

plt.figure(figsize=(10,2))
count = 0
for img_idx,img_file in enumerate(os.listdir("downloads/objects that look like faces/")):
    # load the input image, resize it, and convert it to grayscale
    image = cv2.imread(''.join(["downloads/objects that look like faces/",img_file]))
    if image is None:
        continue
    if img_file[:2] not in ['60','83','3.','4.','50','51','43']:
        continue
    image = image[:,:,::-1] #BGR to RGB
    plt.subplot(2,10,count+1),plt.xticks(()),plt.yticks(())
    object_face = cv2.resize(image, (256,256), 0, 0) / 256.
    plt.imshow(object_face)
    plt.subplot(2,10,count+11),plt.xticks(()),plt.yticks(())
    plt.imshow(np.squeeze(vae.predict(object_face[np.newaxis])));
    count += 1
    if count >= 9: break
plt.show()

Morph between faces with the VAE

idx = np.random.randint(faces_align.shape[0], size=(2,))
plt.figure(figsize=(8,40))
plt.subplot(1,7,1),plt.xticks(()),plt.yticks(()),plt.imshow(faces_align[idx[0]])
plt.subplot(1,7,7),plt.xticks(()),plt.yticks(()),plt.imshow(faces_align[idx[1]])
A = encoder.predict(faces_align[idx]/256.)
for i,a in enumerate(np.linspace(0,1,5)):
    blend = A[0][np.newaxis,0] * (1. - a) + A[0][np.newaxis,1] * a
    plt.subplot(1,7,2+i),plt.xticks(()),plt.yticks(())
    plt.imshow(np.squeeze(generator.predict(blend)));

def vae_morph(images_):
    faces_len = np.min([20,images_.shape[0]])
    pb = display.ProgressBar(faces_len)
    pb.display()
    os.remove('movie.gif')
    with imageio.get_writer('movie.gif', mode='I') as writer:
        for f in range(faces_len - 1):
            A = encoder.predict(images_[[f,f+1]]/256.)
            for i,a in enumerate(np.linspace(0,1,7)):
                blend_latent = A[0][np.newaxis,0] * (1. - a) + A[0][np.newaxis,1] * a
                blended = np.squeeze(generator.predict(blend_latent))
                writer.append_data((blended * 255.).astype(np.uint8))
            pb.progress = f+1
    display.clear_output(wait=True)
    return HTML('<img src="movie.gif?%s">'%(random.randint(0,1000)))

And an animation morph GIF:

vae_morph(faces_align)


Hope you enjoyed!
(All sources are license-free, use them at will! But don’t blame me if something breaks. Also please don’t use my name to endorse your project)

Leave a Reply

Your email address will not be published. Required fields are marked *