code/classifier.py

#!/usr/bin/env python2

import time

start = time.time()

import argparse
import cv2
import os
import pickle

from operator import itemgetter

import numpy as np
np.set_printoptions(precision=2)
import pandas as pd

import openface

from sklearn.pipeline import Pipeline
from sklearn.lda import LDA
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.mixture import GMM
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

fileDir = os.path.dirname(os.path.realpath(__file__))
modelDir = os.path.join(fileDir, '..', 'openface/models')
dlibModelDir = os.path.join(modelDir, 'dlib')
openfaceModelDir = os.path.join(modelDir, 'openface')


def getRep(imgPath):
	start = time.time()
	bgrImg = cv2.imread(imgPath)
	if bgrImg is None:
		raise Exception("Unable to load image/frame")

	rgbImg = cv2.cvtColor(bgrImg, cv2.COLOR_BGR2RGB)

	if args.verbose:
		print("  + Original size: {}".format(rgbImg.shape))
	if args.verbose:
		print("Loading the image took {} seconds.".format(time.time() - start))

	start = time.time()

	# Get the largest face bounding box
	# bb = align.getLargestFaceBoundingBox(rgbImg) #Bounding box

	# Get all bounding boxes
	bb = align.getAllFaceBoundingBoxes(rgbImg)

	if bb is None:
		# raise Exception("Unable to find a face: {}".format(imgPath))
		return None
	if args.verbose:
		print("Face detection took {} seconds.".format(time.time() - start))

	start = time.time()

	alignedFaces = []
	for box in bb:
		alignedFaces.append(
			align.align(
				args.imgDim,
				rgbImg,
				box,
				landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE))

	if alignedFaces is None:
		raise Exception("Unable to align the frame")
	if args.verbose:
		print("Alignment took {} seconds.".format(time.time() - start))

	start = time.time()

	reps = []
	for alignedFace in alignedFaces:
		reps.append(net.forward(alignedFace))

	if args.verbose:
		print("Neural network forward pass took {} seconds.".format(
			time.time() - start))

	# print reps
	return reps


def train(args):
	print("Loading embeddings.")
	fname = "{}/labels.csv".format(args.workDir)
	labels = pd.read_csv(fname, header=None).as_matrix()[:, 1]
	labels = map(itemgetter(1),
				 map(os.path.split,
					 map(os.path.dirname, labels)))  # Get the directory.
	fname = "{}/reps.csv".format(args.workDir)
	embeddings = pd.read_csv(fname, header=None).as_matrix()
	le = LabelEncoder().fit(labels)
	labelsNum = le.transform(labels)
	nClasses = len(le.classes_)
	print("Training for {} classes.".format(nClasses))

	if args.classifier == 'LinearSvm':
		clf = SVC(C=1, kernel='linear', probability=True)
	elif args.classifier == 'GridSearchSvm':
		print("""
		Warning: In our experiences, using a grid search over SVM hyper-parameters only
		gives marginally better performance than a linear SVM with C=1 and
		is not worth the extra computations of performing a grid search.
		""")
		param_grid = [
			{'C': [1, 10, 100, 1000],
			 'kernel': ['linear']},
			{'C': [1, 10, 100, 1000],
			 'gamma': [0.001, 0.0001],
			 'kernel': ['rbf']}
		]
		clf = GridSearchCV(SVC(C=1, probability=True), param_grid, cv=5)
	elif args.classifier == 'GMM':  # Doesn't work best
		clf = GMM(n_components=nClasses)

	# ref:
	# http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html#example-classification-plot-classifier-comparison-py
	elif args.classifier == 'RadialSvm':  # Radial Basis Function kernel
		# works better with C = 1 and gamma = 2
		clf = SVC(C=1, kernel='rbf', probability=True, gamma=2)
	elif args.classifier == 'DecisionTree':  # Doesn't work best
		clf = DecisionTreeClassifier(max_depth=20)
	elif args.classifier == 'GaussianNB':
		clf = GaussianNB()

	# ref: https://jessesw.com/Deep-Learning/
	elif args.classifier == 'DBN':
		from nolearn.dbn import DBN
		clf = DBN([embeddings.shape[1], 500, labelsNum[-1:][0] + 1],  # i/p nodes, hidden nodes, o/p nodes
				  learn_rates=0.3,
				  # Smaller steps mean a possibly more accurate result, but the
				  # training will take longer
				  learn_rate_decays=0.9,
				  # a factor the initial learning rate will be multiplied by
				  # after each iteration of the training
				  epochs=300,  # no of iternation
				  # dropouts = 0.25, # Express the percentage of nodes that
				  # will be randomly dropped as a decimal.
				  verbose=1)

	if args.ldaDim > 0:
		clf_final = clf
		clf = Pipeline([('lda', LDA(n_components=args.ldaDim)),
						('clf', clf_final)])

	clf.fit(embeddings, labelsNum)

	fName = "{}/classifier.pkl".format(args.workDir)
	print("Saving classifier to '{}'".format(fName))
	with open(fName, 'w') as f:
		pickle.dump((le, clf), f)


# def infer(args):
# 	with open(args.classifierModel, 'r') as f:
# 		(le, clf) = pickle.load(f)

# 	for img in args.imgs:
# 		print("\n=== {} ===".format(img))
# 		rep = getRep(img).reshape(1, -1)
# 		predictions = clf.predict_proba(rep).ravel()
# 		maxI = np.argmax(predictions)
# 		person = le.inverse_transform(maxI)
# 		confidence = predictions[maxI]
# 		print("Predict {} with {:.2f} confidence.".format(person, confidence))
# 		if isinstance(clf, GMM):
# 			dist = np.linalg.norm(rep - clf.means_[maxI])
# 			print("  + Distance from the mean: {}".format(dist))


def infer(args):
	with open(args.classifierModel, 'r') as f:
		(le, clf) = pickle.load(f)  # le - label and clf - classifer

	for img in args.imgs:
		reps = getRep(img)
		persons = []
		confidences = []
		for rep in reps:
			try:
				rep = rep.reshape(1, -1)
			except:
				print "No Face detected"
				return (None, None)
			start = time.time()
			predictions = clf.predict_proba(rep).ravel()
			# print predictions
			maxI = np.argmax(predictions)
			# max2 = np.argsort(predictions)[-3:][::-1][1]
			persons.append(le.inverse_transform(maxI))
			# print str(le.inverse_transform(max2)) + ": "+str( predictions [max2])
			# ^ prints the second prediction
			confidences.append(predictions[maxI])
			if args.verbose:
				print("Prediction took {} seconds.".format(time.time() - start))
				pass
			# print("Predict {} with {:.2f} confidence.".format(person, confidence))
			if isinstance(clf, GMM):
				dist = np.linalg.norm(rep - clf.means_[maxI])
				print("  + Distance from the mean: {}".format(dist))
				pass

		
		frame = cv2.imread(img)
		if persons:
			cv2.putText(frame, "{} ({}%)".format(persons[0], round(confidences[0]*100, 1)),(5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 100, 300), 2)
		cv2.imwrite(img.replace('test', 'output'), frame)
		
		if len(persons) != 0:
			print persons, img
			# print persons, confidences, img
		# print()


if __name__ == '__main__':

	parser = argparse.ArgumentParser()

	parser.add_argument(
		'--dlibFacePredictor',
		type=str,
		help="Path to dlib's face predictor.",
		default=os.path.join(
			dlibModelDir,
			"shape_predictor_68_face_landmarks.dat"))
	parser.add_argument(
		'--networkModel',
		type=str,
		help="Path to Torch network model.",
		default=os.path.join(
			openfaceModelDir,
			'nn4.small2.v1.t7'))
	parser.add_argument('--imgDim', type=int,
						help="Default image dimension.", default=96)
	parser.add_argument('--cuda', action='store_true')
	parser.add_argument('--verbose', action='store_true')

	subparsers = parser.add_subparsers(dest='mode', help="Mode")
	trainParser = subparsers.add_parser('train',
										help="Train a new classifier.")
	trainParser.add_argument('--ldaDim', type=int, default=-1)
	trainParser.add_argument(
		'--classifier',
		type=str,
		choices=[
			'LinearSvm',
			'GridSearchSvm',
			'GMM',
			'RadialSvm',
			'DecisionTree',
			'GaussianNB',
			'DBN'],
		help='The type of classifier to use.',
		default='LinearSvm')
	trainParser.add_argument(
		'workDir',
		type=str,
		help="The input work directory containing 'reps.csv' and 'labels.csv'. Obtained from aligning a directory with 'align-dlib' and getting the representations with 'batch-represent'.")

	inferParser = subparsers.add_parser(
		'infer', help='Predict who an image contains from a trained classifier.')
	inferParser.add_argument(
		'classifierModel',
		type=str,
		help='The Python pickle representing the classifier. This is NOT the Torch network model, which can be set with --networkModel.')
	inferParser.add_argument('imgs', type=str, nargs='+',
							 help="Input image.")

	args = parser.parse_args()
	if args.verbose:
		print("Argument parsing and import libraries took {} seconds.".format(
			time.time() - start))

	if args.mode == 'infer' and args.classifierModel.endswith(".t7"):
		raise Exception("""
Torch network model passed as the classification model,
which should be a Python pickle (.pkl)

See the documentation for the distinction between the Torch
network and classification models:

		http://cmusatyalab.github.io/openface/demo-3-classifier/
		http://cmusatyalab.github.io/openface/training-new-models/

Use `--networkModel` to set a non-standard Torch network model.""")
	start = time.time()

	align = openface.AlignDlib(args.dlibFacePredictor)
	net = openface.TorchNeuralNet(args.networkModel, imgDim=args.imgDim,
								  cuda=args.cuda)

	if args.verbose:
		print("Loading the dlib and OpenFace models took {} seconds.".format(
			time.time() - start))
		start = time.time()

	if args.mode == 'train':
		train(args)
	elif args.mode == 'infer':
		infer(args)