转自:https://blog.csdn.net/qq_29573053/article/details/78517167
一般的人脸识别应用通常都包括三个过程:
1 人脸detect,这一步主要是定位人脸在图像中的位置,输出人脸的位置矩形框
2 人脸shape predictor,这一步主要是找出眼睛眉毛鼻子嘴巴的68个定位点
3 人脸对齐alignment,这一步主要是通过投影几何变换出一张标准脸
4 人脸识别,这一步主要在对齐的人脸图像上提取128维的特征向量,根据特征向量间的距离来进行判断识别。
本编文章主要想解读一下dlib中关于人脸对齐的源码。人脸对齐主要是一个affline transform即仿射变换,我们在detect到人脸后会得到一个矩形位置框,需要将这个矩形里面的人脸变换到150*150大小的标准人脸,英文叫做chip,首先看一下一个结构体,它包含变换的一些基本信息chip_detail:
-
-
-
chip_details() : angle(0), rows(0), cols(0) {}
-
chip_details(const rectangle& rect_) : rect(rect_),angle(0), rows(rect_.height()), cols(rect_.width()) {}
-
chip_details(const drectangle& rect_) : rect(rect_),angle(0),
-
rows((unsigned long)(rect_.height()+0.5)), cols((unsigned long)(rect_.width()+0.5)) {}
-
chip_details(const drectangle& rect_, unsigned long size) : rect(rect_),angle(0)
-
{ compute_dims_from_size(size); }
-
chip_details(const drectangle& rect_, unsigned long size, double angle_) : rect(rect_),angle(angle_)
-
{ compute_dims_from_size(size); }
-
-
chip_details(const drectangle& rect_, const chip_dims& dims) :
-
rect(rect_),angle(0),rows(dims.rows), cols(dims.cols) {}
-
chip_details(const drectangle& rect_, const chip_dims& dims, double angle_) :
-
rect(rect_),angle(angle_),rows(dims.rows), cols(dims.cols) {}
-
-
-
-
const std::
vectorvector2> >& chip_points,
-
const std::
vectorvector2> >& img_points,
-
-
-
rows(dims.rows), cols(dims.cols)
-
-
DLIB_CASSERT( chip_points.size() == img_points.size() && chip_points.size() >= 2,
-
"\t chip_details::chip_details(chip_points,img_points,dims)"
-
<< "\n\t Invalid inputs were given to this function."
-
<< "\n\t chip_points.size(): " << chip_points.size()
-
<< "\n\t img_points.size(): " << img_points.size()
-
-
-
const point_transform_affine tform = find_similarity_transform(chip_points,img_points);
-
dlib::vector<double,2> p(1,0);
-
-
-
-
-
-
angle = std::atan2(p.y(),p.x());
-
-
-
const double scale = length(p);
-
rect = centered_drect(tform(point(dims.cols,dims.rows)/2.0),
-
-
-
-
-
-
-
-
-
-
-
inline unsigned long size() const
-
-
-
-
-
-
void compute_dims_from_size (
-
-
-
-
const double relative_size = std::sqrt(size/(double)rect.area());
-
rows = static_cast<unsigned long>(rect.height()*relative_size + 0.5);
-
cols = static_cast<unsigned long>(size/(double)rows + 0.5);
-
rows = std::max(1ul,rows);
-
cols = std::max(1ul,cols);
-
-
这个结构体的构造函数,主要传入chip上的特征点和实际人脸的特征点,找到一个相似变换,然后提取角度,平移,缩放三个方面的信息,角度放在angle,平移和缩放放在rect
-
-
-
const std::
vectorvector2> >& chip_points,
-
const std::
vectorvector2> >& img_points,
-
-
需要注意的是,这里的变换是从chip-》img的(img指原图),意思给定chip上一个点,可以找到img上面对应点。
接下来就是获取这个人脸的chip_detail方法了:
-
inline chip_details get_face_chip_details (
-
const full_object_detection& det,
-
const unsigned long size = 200,
-
const double padding = 0.2
-
-
-
DLIB_CASSERT(det.num_parts() == 68,
-
"\t chip_details get_face_chip_details()"
-
<< "\n\t You must give a detection with exactly 68 parts in it."
-
<< "\n\t det.num_parts(): " << det.num_parts()
-
-
DLIB_CASSERT(padding >= 0 && size > 0,
-
"\t chip_details get_face_chip_details()"
-
<< "\n\t Invalid inputs were given to this function."
-
<< "\n\t padding: " << padding
-
-
-
-
-
const double mean_face_shape_x[] = {
-
0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
-
0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
-
0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
-
0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
-
0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
-
0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
-
0.553364, 0.490127, 0.42689
-
-
const double mean_face_shape_y[] = {
-
0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
-
0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
-
0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
-
0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
-
0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
-
0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
-
0.784792, 0.824182, 0.831803, 0.824182
-
-
-
COMPILE_TIME_ASSERT(sizeof(mean_face_shape_x)/sizeof(double) == 68-17);
-
-
std::
vectorvector<double,2> > from_points, to_points;
-
for (unsigned long i = 17; i < det.num_parts(); ++i)
-
-
-
if ((55 <= i && i <= 59) || (65 <= i && i <= 67))
-
-
-
-
-
-
dlib::vector<double,2> p;
-
p.x() = (padding+mean_face_shape_x[i-17])/(2*padding+1);
-
p.y() = (padding+mean_face_shape_y[i-17])/(2*padding+1);
-
from_points.push_back(p*size);
-
to_points.push_back(det.part(i));
-
-
-
return chip_details(from_points, to_points, chip_dims(size,size));
-
这个首先定义标准脸特征点的位置,再解析出了chip_detail结构体。
再接下来就是提取人脸图片了:
-
-
-
-
-
void extract_image_chips (
-
-
const std::vector& chip_locations,
-
-
-
-
-
-
for (unsigned long i = 0; i < chip_locations.size(); ++i)
-
-
DLIB_CASSERT(chip_locations[i].size() != 0 &&
-
chip_locations[i].rect.is_empty() == false,
-
"\t void extract_image_chips()"
-
<< "\n\t Invalid inputs were given to this function."
-
<< "\n\t chip_locations["<"].size(): " << chip_locations[i].size()
-
<< "\n\t chip_locations["<"].rect.is_empty(): " << chip_locations[i].rect.is_empty()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
for (unsigned long i = 0; i < chip_locations.size(); ++i)
-
-
-
-
drectangle rect = pyr.rect_down(chip_locations[i].rect);
-
while (rect.area() > chip_locations[i].size())
-
-
rect = pyr.rect_down(rect);
-
-
-
-
-
-
-
const vector<double,2> cent = center(chip_locations[i].rect);
-
rot_rect += rotate_point<double>(cent,chip_locations[i].rect.tl_corner(),chip_locations[i].angle);
-
rot_rect += rotate_point<double>(cent,chip_locations[i].rect.tr_corner(),chip_locations[i].angle);
-
rot_rect += rotate_point<double>(cent,chip_locations[i].rect.bl_corner(),chip_locations[i].angle);
-
rot_rect += rotate_point<double>(cent,chip_locations[i].rect.br_corner(),chip_locations[i].angle);
-
bounding_box += grow_rect(rot_rect, grow).intersect(get_rect(img));
-
max_depth = std::max(depth,max_depth);
-
-
-
-
-
-
dlib::
arraytypename image_traits::pixel_type> > levels(max_depth);
-
-
pyr(sub_image(img,bounding_box),levels[0]);
-
for (unsigned long i = 1; i < levels.size(); ++i)
-
pyr(levels[i-1],levels[i]);
-
-
std::
vectorvector<double,2> > from, to;
-
-
-
chips.resize(chip_locations.size());
-
for (unsigned long i = 0; i < chips.size(); ++i)
-
-
-
-
if (chip_locations[i].angle == 0 &&
-
chip_locations[i].rows == chip_locations[i].rect.height() &&
-
chip_locations[i].cols == chip_locations[i].rect.width())
-
-
impl::basic_extract_image_chip(img, chip_locations[i].rect, chips[i]);
-
-
-
-
set_image_size(chips[i], chip_locations[i].rows, chip_locations[i].cols);
-
-
-
-
drectangle rect = translate_rect(chip_locations[i].rect, -bounding_box.tl_corner());
-
while (pyr.rect_down(rect).area() > chip_locations[i].size())
-
-
-
rect = pyr.rect_down(rect);
-
-
-
-
-
-
-
from.push_back(get_rect(chips[i]).tl_corner()); to.push_back(rotate_point<double>(center(rect),rect.tl_corner(),chip_locations[i].angle));
-
from.push_back(get_rect(chips[i]).tr_corner()); to.push_back(rotate_point<double>(center(rect),rect.tr_corner(),chip_locations[i].angle));
-
from.push_back(get_rect(chips[i]).bl_corner()); to.push_back(rotate_point<double>(center(rect),rect.bl_corner(),chip_locations[i].angle));
-
point_transform_affine trns = find_affine_transform(from,to);
-
-
-
-
transform_image(sub_image(img,bounding_box),chips[i],interpolate_bilinear(),trns);
-
-
transform_image(levels[level],chips[i],interpolate_bilinear(),trns);
-
-
-
上面函数中,首先需要构造一个图像金字塔用于图像缩放,因为如果我们的chip是150*150的,映射到原图上的1000*1000,这个就需要有个缩放的过程,如果我们直接从缩放到150*150得到的图像质量不好,因此采用一级级下采样来缩放,函数中首先寻找到对应的目标下采样深度,进行图像下采样,接着通过find_affine_transform来计算到仿射变换矩阵,得到矩阵后直接transform_image就好了。transform_image里面就不分析了,基本就是一个个像素位置变换填充就可以了。