dlib人脸检测源码解析-mingfei10-ChinaUnix博客

mingfei10的ChinaUnix博客

首页　| 　博文目录　| 　关于我

mingfei10

博客访问： 1540840
博文数量： 230
博客积分： 474
博客等级：下士
技术积分： 1955
用户组：普通用户
注册时间： 2010-03-19 18:40

文章分类

全部博文（230）

python（21）

人工智能（10）
java（1）
SDN&NFV（2）
linux Manag（8）
linux Dev（8）
cloud（10）
Storage（10）
未分配的博文（170）

文章存档

2020年（3）

2019年（3）

2018年（12）

2017年（13）

2016年（11）

2015年（55）

2014年（74）

2013年（39）

2012年（2）

2011年（18）

我的朋友

cj83226

dlib人脸检测源码解析

dlib是基于c++的跨平台通用框架，其内容非常丰富，涵盖机器学习、图像处理、数据压缩等。原作者链接。dlib在image Processing 模块中定义了基于方向梯度直方图（Histogram of Oriented Gradient，HOG）特征和图像金字塔的多尺度目标检测方法。在非深度学习的领域中，其目标检测算法准确率较高，同时能保证良好的实时性。
本文将以人脸检测为例，详细解析dlib本部分源码。

源码及解析

frontal_face_detector.h

获取已训练的目标检测模型

namespace dlib
{ //miexp : 6 控制缩放步长 （6-1）/6 typedef object_detector6> > > frontal_face_detector; // 注：dlib将模型文件编码成字符串放在frontal_face_detector.h中，该函数用于解码 //    字符串获取模型文件内容 inline const std::string get_serialized_frontal_faces(); inline frontal_face_detector get_frontal_face_detector()；
}

	
	
		1
	

	
		2
	

	
		3
	

	
		4
	

	
		5
	

	
		6
	

	
		7
	

	
		8

object_detector.h

目标检测主函数，分别用5种角度的目标检测器分别调用scanner.detect() 函数找出当前图像中包含人脸的位置

void object_detector:: operator() (// miexp: 目标检测接口 const image_type& img, std::vector& final_dets, double adjust_threshold ）
{
    scanner.load(img);//载入当前帧或者图像，并提取hog特征用于检测 std::vector<std::pair<double, rectangle> > dets;//被挑选中的位置和得分 std::vector dets_accum; for (unsigned long i = 0; i < w.size(); ++i)//分别检测5种角度 { const double thresh = w[i].w(scanner.get_num_dimensions());
        scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);// 得到候选位置 for (unsigned long j = 0; j < dets.size(); ++j)
        {
            rect_detection temp;
            temp.detection_confidence = dets[j].first-thresh;//计算候选位置的detection_confidence temp.weight_index = i;
            temp.rect = dets[j].second;
            dets_accum.push_back(temp);
        }
    } //  非极大值抑制，用于筛选最准确的窗口 final_dets.clear(); if (w.size() > 1) std::sort(dets_accum.rbegin(), dets_accum.rend());//按detection_confidence排序 for (unsigned long i = 0; i < dets_accum.size(); ++i)
    { // 候选窗口若覆盖，则放弃当前窗口（复杂度n2） if (overlaps_any_box(final_dets, dets_accum[i].rect)) continue；
         final_dets.push_back(dets_accum[i]);
    }
}

	
	
		1
	

	
		2
	

	
		3
	

	
		4
	

	
		5
	

	
		6
	

	
		7
	

	
		8
	

	
		9
	

	
		10
	

	
		11
	

	
		12
	

	
		13
	

	
		14
	

	
		15
	

	
		16
	

	
		17
	

	
		18
	

	
		19
	

	
		20
	

	
		21
	

	
		22
	

	
		23
	

	
		24
	

	
		25
	

	
		26
	

	
		27
	

	
		28
	

	
		29
	

	
		30
	

	
		31
	

	
		32
	

	
		33
	

	
		34

scan_fhog_pyramid.h

load 载入图像并以金字塔形式提取hog特征

void scan_fhog_pyramid::load (constimage_type& img )
{ // miexp: load 载入图像并提取hog特征 unsigned long width, height;
     compute_fhog_window_size(width,height);//金字塔顶端最小图像，与hog窗口大小相关 10 10 //计算图像金字塔，并提取各层HOG特征存入feats中 impl::create_fhog_pyramid(img, fe, feats, cell_size, height,
            width, min_pyramid_layer_width, min_pyramid_layer_height,
            max_pyramid_levels);
 } //--------------------------------------------------------------- // mcreate_fhog_pyramid 构建图像HOG特征金字塔  void create_fhog_pyramid ( const image_type& img, const feature_extractor_type& fe, array<arrayfloat> > >& feats, int cell_size, int filter_rows_padding, int filter_cols_padding, unsigned long min_pyramid_layer_width, unsigned long min_pyramid_layer_height, unsigned long max_pyramid_levels
        )
 { unsigned long levels = 0;
    rectangle rect = get_rect(img);
    pyramid_type pyr;
    {//根据图像大小计算金字塔层数 do {
       rect = pyr.rect_down(rect);
       ++levels;
       } while (rect.width() >= min_pyramid_layer_width && 
                rect.height() >= min_pyramid_layer_height &&
                levels < max_pyramid_levels);
    } //根据金字塔层数分配内存 if (feats.max_size() < levels)
       feats.set_max_size(levels);
    feats.set_size(levels); // 构建特征金字塔 // 提取第0层hog特征 ，通过（）运算符重载，会调用中的 impl_extract_fhog_features函数 fe(img, feats[0], cell_size,filter_rows_padding,filter_cols_padding)； if (feats.size() > 1)
    { typedef typename image_traits::pixel_type pixel_type;
       array2d temp1, temp2;
       pyr(img, temp1);// 下采样 //提取第1层HOG特征 fe(temp1, feats[1], cell_size,filter_rows_padding,filter_cols_padding);
       swap(temp1,temp2); for (unsigned long i = 2; i < feats.size(); ++i)
       {
          pyr(temp2, temp1);//下采样 //提取第i层特征 fe(temp1, feats[i], cell_size,filter_rows_padding,filter_cols_padding);
          swap(temp1,temp2);
       }
     }
 } //--------------------------------------------------------------- // 调用的extract_fhog_features函数  void impl_extract_fhog_features( const image_type& img_, 
                                 out_type& hog, int cell_size,//本例中为8，即每个cell中有8x8个像素点 int filter_rows_padding, int filter_cols_padding ) 
{
    const_image_view img(img_);//定义第一象限和第二象限的9个方向的单位向量 matrix<float,2,1> directions[9];
    directions[0] = 1.0000, 0.0000; 
    directions[1] = 0.9397, 0.3420;
    directions[2] = 0.7660, 0.6428;
    directions[3] = 0.500, 0.8660;
    directions[4] = 0.1736, 0.9848;
    directions[5] = -0.1736, 0.9848;
    directions[6] = -0.5000, 0.8660;
    directions[7] = -0.7660, 0.6428;
    directions[8] = -0.9397, 0.3420; //计算所需内存用于缓存梯度直方图及各cell梯度直方图向量的norm const int cells_nr = (int)((float)img.nr()/(float)cell_size + 0.5); const int cells_nc = (int)((float)img.nc()/(float)cell_size + 0.5); //三维矩阵 cells_nr+2  cells_nc+2  18  array2dfloat,18,1> > hist(cells_nr+2, cells_nc+2); for (long r = 0; r < hist.nr(); ++r) for (long c = 0; c < hist.nc(); ++c)
           hist[r][c] = 0;//初始化为0 array2d<float> norm(cells_nr, cells_nc);// 各cell梯度直方图的norm值矩阵 assign_all_pixels(norm, 0);//初始化为0 // memory for HOG features const int hog_nr = std::max(cells_nr-2, 0); const int hog_nc = std::max(cells_nc-2, 0); const int padding_rows_offset = (filter_rows_padding-1)/2; const int padding_cols_offset = (filter_cols_padding-1)/2;
    init_hog(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding); const int visible_nr = std::min((long)cells_nr*cell_size,img.nr())-1; const int visible_nc = std::min((long)cells_nc*cell_size,img.nc())-1; //首先计算方向梯度直方图 for (int y = 1; y < visible_nr; y++) //遍历图像纵坐标 { const float yp = ((float)y+0.5)/(float)cell_size - 0.5; const int iyp = (int)std::floor(yp); const float vy0 = yp - iyp; const float vy1 = 1.0 - vy0; int x; for (x = 1; x < visible_nc; x++) 
         {
            matrix<float, 2, 1> grad; float v;
            get_gradient(y,x,img,grad,v);//做水平垂直差分，计算当前像素点的梯度（dy,dx） // 即找出最大的内积绝对值 和 方向 float best_dot = 0; int best_o = 0; for (int o = 0; o < 9; o++) 
            { const float dot = dlib::dot(directions[o], grad);//点乘计算内积 if (dot > best_dot) //在一、二象限内积为正 {
                  best_dot = dot;
                  best_o = o;
               } else if (-dot > best_dot) //在三、四象限内积为负 {
                  best_dot = -dot;
                  best_o = o+9;
               }
            }
            v = std::sqrt(v); // add to 4 histograms around pixel using bilinear interpolation const float xp = ((double)x + 0.5) / (double)cell_size - 0.5; const int ixp = (int)std::floor(xp); const float vx0 = xp - ixp; const float vx1 = 1.0 - vx0; // 插值，抑制突变 hist[iyp+1][ixp+1](best_o) += vy1*vx1*v;
             hist[iyp+1+1][ixp+1](best_o) += vy0*vx1*v;
             hist[iyp+1][ixp+1+1](best_o) += vy1*vx0*v;
             hist[iyp+1+1][ixp+1+1](best_o) += vy0*vx0*v;
         }
    } //计算每个cell的energy for (int r = 0; r < cells_nr; ++r)
    { for (int c = 0; c < cells_nc; ++c)
       { for (int o = 0; o < 9; o++) 
          {// 平方和求模 norm[r][c] += (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)) *
                          (hist[r+1][c+1](o) + hist[r+1][c+1](o+9));
          }
       }
    } //然后。。。。太多了不抄代码了，大致如下： for (int y = 0; y < hog_nr; y++) 
    { for (int x = 0; x < hog_nc; x++) 
       { for（int o = 0; o < 31; o++）
            {
               set_hog(hog,o,x,y，18维的单位向量[某种映射关系]);
            }
       }
    } //得到该图的hog特征，hog  (h/8 ,w/8 ,31),即图像中每个8x8的块，对应一个31维的向量  }

	
	
		1
	

	
		2
	

	
		3
	

	
		4
	

	
		5
	

	
		6
	

	
		7
	

	
		8
	

	
		9
	

	
		10
	

	
		11
	

	
		12
	

	
		13
	

	
		14
	

	
		15
	

	
		16
	

	
		17
	

	
		18
	

	
		19
	

	
		20
	

	
		21
	

	
		22
	

	
		23
	

	
		24
	

	
		25
	

	
		26
	

	
		27
	

	
		28
	

	
		29
	

	
		30
	

	
		31
	

	
		32
	

	
		33
	

	
		34
	

	
		35
	

	
		36
	

	
		37
	

	
		38
	

	
		39
	

	
		40
	

	
		41
	

	
		42
	

	
		43
	

	
		44
	

	
		45
	

	
		46
	

	
		47
	

	
		48
	

	
		49
	

	
		50
	

	
		51
	

	
		52
	

	
		53
	

	
		54
	

	
		55
	

	
		56
	

	
		57
	

	
		58
	

	
		59
	

	
		60
	

	
		61
	

	
		62
	

	
		63
	

	
		64
	

	
		65
	

	
		66
	

	
		67
	

	
		68
	

	
		69
	

	
		70
	

	
		71
	

	
		72
	

	
		73
	

	
		74
	

	
		75
	

	
		76
	

	
		77
	

	
		78
	

	
		79
	

	
		80
	

	
		81
	

	
		82
	

	
		83
	

	
		84
	

	
		85
	

	
		86
	

	
		87
	

	
		88
	

	
		89
	

	
		90
	

	
		91
	

	
		92
	

	
		93
	

	
		94
	

	
		95
	

	
		96
	

	
		97
	

	
		98
	

	
		99
	

	
		100
	

	
		101
	

	
		102
	

	
		103
	

	
		104
	

	
		105
	

	
		106
	

	
		107
	

	
		108
	

	
		109
	

	
		110
	

	
		111
	

	
		112
	

	
		113
	

	
		114
	

	
		115
	

	
		116
	

	
		117
	

	
		118
	

	
		119
	

	
		120
	

	
		121
	

	
		122
	

	
		123
	

	
		124
	

	
		125
	

	
		126
	

	
		127
	

	
		128
	

	
		129
	

	
		130
	

	
		131
	

	
		132
	

	
		133
	

	
		134
	

	
		135
	

	
		136
	

	
		137
	

	
		138
	

	
		139
	

	
		140
	

	
		141
	

	
		142
	

	
		143
	

	
		144
	

	
		145
	

	
		146
	

	
		147
	

	
		148
	

	
		149
	

	
		150
	

	
		151
	

	
		152
	

	
		153
	

	
		154
	

	
		155
	

	
		156
	

	
		157
	

	
		158
	

	
		159
	

	
		160
	

	
		161
	

	
		162
	

	
		163
	

	
		164
	

	
		165
	

	
		166
	

	
		167

前面提取到了金字塔每一层的hog特征，一并存入feats中，然后开始检测

void detect_from_fhog_pyramid ( const array<arrayfloat> > >& feats, const feature_extractor_type& fe, const fhog_filterbank& w, const double thresh, const unsigned long det_box_height, const unsigned long det_box_width, const int cell_size, const int filter_rows_padding, const int filter_cols_padding, std::vector<std::pair<double, rectangle> >& dets ) 
{
   dets.clear();//初始化 array2d<float> saliency_image;//显著性系数图，系数越大，则对应位置包含目标可能性越高 pyramid_type pyr; //遍历金字塔所有层 for (unsigned long l = 0; l < feats.size(); ++l)
   { // 检测每一层，得到每一层的显著性系数图，及目标区域 const rectangle area = apply_filters_to_fhog(w, feats[l], saliency_image); // 遍历该区域 for (long r = area.top(); r <= area.bottom(); ++r)
      { for (long c = area.left(); c <= area.right(); ++c)
         { // if we found a detection if (saliency_image[r][c] >= thresh)//若该处显著性系数大于阈值 { // 取出该处的矩阵位置 rectangle temp = centered_rect(point(c,r),det_box_width,det_box_height)；
               rectangle rect = fe.feats_to_image(temp,cell_size, 
                                         filter_rows_padding, filter_cols_padding);
               rect = pyr.rect_up(rect, l);//放大到正常大小 dets.push_back(std::make_pair(saliency_image[r][c], rect));
             }
          }
      }
   } std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);//根据显著性系数排序 }

	
	
		1
	

	
		2
	

	
		3
	

	
		4
	

	
		5
	

	
		6
	

	
		7
	

	
		8
	

	
		9
	

	
		10
	

	
		11
	

	
		12
	

	
		13
	

	
		14
	

	
		15
	

	
		16
	

	
		17
	

	
		18
	

	
		19
	

	
		20
	

	
		21
	

	
		22
	

	
		23
	

	
		24
	

	
		25
	

	
		26
	

	
		27
	

	
		28
	

	
		29
	

	
		30
	

	
		31
	

	
		32
	

	
		33
	

	
		34
	

	
		35
	

	
		36
	

	
		37
	

	
		38
	

	
		39

检测每一层

rectangle apply_filters_to_fhog( const fhog_filterbank& w,//读入的本地检测模型 const arrayfloat> >& feats,//当前层hog特征集 array2d<float>& saliency_image )
{
    rectangle area;
    saliency_image.clear();
    array2d<float> scratch; unsigned long i = 0; for (j=0; i < w.row_filters.size(); ++i)// 31个卷积核 {// 31个共同投票求和 for (unsigned long j = 0; j < w.row_filters[i].size(); ++j)// 2 或者 3  {// row_filters[i][j] 和w.col_filters[i][j] 均为长度为10的向量 说明检测窗口是80×80 像素  area = float_spatially_filter_image_separable(feats[i], saliency_image, 
                           w.row_filters[i][j], w.col_filters[i][j],scratch,true); //先横向卷积，后纵向卷积，两次一维卷积比二维卷积速度更快 }
    } if (saliency_image.size() == 0)
    {
        saliency_image.set_size(feats[0].nr(), feats[0].nc());
        assign_all_pixels(saliency_image, 0);
    } return area;
}

	
	
		1
	

	
		2
	

	
		3
	

	
		4
	

	
		5
	

	
		6
	

	
		7
	

	
		8
	

	
		9
	

	
		10
	

	
		11
	

	
		12
	

	
		13
	

	
		14
	

	
		15
	

	
		16
	

	
		17
	

	
		18
	

	
		19
	

	
		20
	

	
		21
	

	
		22
	

	
		23
	

	
		24
	

	
		25

spatial_filtering.h

两次一维卷积

rectangle float_spatially_filter_image_separable( const in_image_type& in_img_,
                                                 out_image_type& out_img_, const matrix_exp& _row_filter, const matrix_exp& _col_filter,
                                                 out_image_type& scratch_, bool add_to = false )
{
    const_temp_matrix row_filter(_row_filter);//float[10]的卷积核 const_temp_matrix col_filter(_col_filter);//float[10]的卷积核 const_image_view in_img(in_img_);
    image_view out_img(out_img_);
    out_img.set_size(in_img.nr(),in_img.nc());//卷积结果存入out_img // 卷积过程的起始位置和终止位置 const long first_row = col_filter.size()/2; const long first_col = row_filter.size()/2; const long last_row = in_img.nr() - ((col_filter.size()-1)/2); const long last_col = in_img.nc() - ((row_filter.size()-1)/2); // 使用 row filter for (long r = 0; r < in_img.nr(); ++r)
    { long c =first_col; for ( ; c < last_col; ++c)
       { float p; float temp = 0; for (long n = 0; n < row_filter.size(); ++n)
           {
               temp += in_img[r][c-first_col+n]*row_filter(n);//一维卷积 }
           scratch[r][c] = temp;
       }
    } // 使用 column filter  for (long r = first_row; r < last_row; ++r)
    { long c = first_col; for (; c < last_col; ++c)
        { float temp = 0; for (long m = 0; m < col_filter.size(); ++m)
            {
               temp += scratch[r-first_row+m][c]*col_filter(m);
            }
            out_img[r][c] = add_to? temp : out_img[r][c] + temp;
        }
    } return non_border;
}

阅读(5416) | 评论(0) | 转发(0) |

上一篇：Python学习之旅—Mysql数据库之约束(外键定义表和表之间的关系)

下一篇：dlib人脸对齐源码详解

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6