Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1515552
  • 博文数量: 230
  • 博客积分: 474
  • 博客等级: 下士
  • 技术积分: 1955
  • 用 户 组: 普通用户
  • 注册时间: 2010-03-19 18:40
文章分类

全部博文(230)

文章存档

2020年(3)

2019年(3)

2018年(12)

2017年(13)

2016年(11)

2015年(55)

2014年(74)

2013年(39)

2012年(2)

2011年(18)

我的朋友

分类: C/C++

2018-08-30 23:08:12

转自:https://blog.csdn.net/weixin_35793375/article/details/78028545?locationNum=3&fps=1

dlib人脸检测源码解析

dlib是基于c++的跨平台通用框架,其内容非常丰富,涵盖机器学习、图像处理、数据压缩等。原作者链接。dlib在image Processing 模块中定义了基于方向梯度直方图(Histogram of Oriented Gradient,HOG)特征和图像金字塔的多尺度目标检测方法。在非深度学习的领域中,其目标检测算法准确率较高,同时能保证良好的实时性。 
本文将以人脸检测为例,详细解析dlib本部分源码。

源码及解析



  • frontal_face_detector.h 


 获取已训练的目标检测模型
namespace dlib
{ //miexp : 6 控制缩放步长 (6-1)/6 typedef object_detector6> > > frontal_face_detector; // 注:dlib将模型文件编码成字符串放在frontal_face_detector.h中,该函数用于解码 //    字符串获取模型文件内容 inline const std::string get_serialized_frontal_faces(); inline frontal_face_detector get_frontal_face_detector();
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8



  • object_detector.h 


目标检测主函数,分别用5种角度的目标检测器分别调用scanner.detect() 函数找出当前图像中包含人脸的位置

void object_detector:: operator() (// miexp: 目标检测接口 const image_type& img, std::vector& final_dets, double adjust_threshold )
{
    scanner.load(img);//载入当前帧或者图像,并提取hog特征用于检测 std::vector<std::pair<double, rectangle> > dets;//被挑选中的位置和得分 std::vector dets_accum; for (unsigned long i = 0; i < w.size(); ++i)//分别检测5种角度 { const double thresh = w[i].w(scanner.get_num_dimensions());
        scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);// 得到候选位置 for (unsigned long j = 0; j < dets.size(); ++j)
        {
            rect_detection temp;
            temp.detection_confidence = dets[j].first-thresh;//计算候选位置的detection_confidence temp.weight_index = i;
            temp.rect = dets[j].second;
            dets_accum.push_back(temp);
        }
    } //  非极大值抑制,用于筛选最准确的窗口 final_dets.clear(); if (w.size() > 1) std::sort(dets_accum.rbegin(), dets_accum.rend());//按detection_confidence排序 for (unsigned long i = 0; i < dets_accum.size(); ++i)
    { // 候选窗口若覆盖,则放弃当前窗口(复杂度n2) if (overlaps_any_box(final_dets, dets_accum[i].rect)) continue;
         final_dets.push_back(dets_accum[i]);
    }
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34



  • scan_fhog_pyramid.h 


load 载入图像并以金字塔形式提取hog特征

void scan_fhog_pyramid::load (constimage_type& img )
{ // miexp: load 载入图像并提取hog特征 unsigned long width, height;
     compute_fhog_window_size(width,height);//金字塔顶端最小图像,与hog窗口大小相关 10 10 //计算图像金字塔,并提取各层HOG特征存入feats中 impl::create_fhog_pyramid(img, fe, feats, cell_size, height,
            width, min_pyramid_layer_width, min_pyramid_layer_height,
            max_pyramid_levels);
 } //--------------------------------------------------------------- // mcreate_fhog_pyramid 构建图像HOG特征金字塔  void create_fhog_pyramid ( const image_type& img, const feature_extractor_type& fe, array<arrayfloat> > >& feats, int cell_size, int filter_rows_padding, int filter_cols_padding, unsigned long min_pyramid_layer_width, unsigned long min_pyramid_layer_height, unsigned long max_pyramid_levels
        )
 { unsigned long levels = 0;
    rectangle rect = get_rect(img);
    pyramid_type pyr;
    {//根据图像大小计算金字塔层数 do {
       rect = pyr.rect_down(rect);
       ++levels;
       } while (rect.width() >= min_pyramid_layer_width && 
                rect.height() >= min_pyramid_layer_height &&
                levels < max_pyramid_levels);
    } //根据金字塔层数分配内存 if (feats.max_size() < levels)
       feats.set_max_size(levels);
    feats.set_size(levels); // 构建特征金字塔 // 提取第0层hog特征 ,通过()运算符重载,会调用中的 impl_extract_fhog_features函数 fe(img, feats[0], cell_size,filter_rows_padding,filter_cols_padding); if (feats.size() > 1)
    { typedef typename image_traits::pixel_type pixel_type;
       array2d temp1, temp2;
       pyr(img, temp1);// 下采样 //提取第1层HOG特征 fe(temp1, feats[1], cell_size,filter_rows_padding,filter_cols_padding);
       swap(temp1,temp2); for (unsigned long i = 2; i < feats.size(); ++i)
       {
          pyr(temp2, temp1);//下采样 //提取第i层特征 fe(temp1, feats[i], cell_size,filter_rows_padding,filter_cols_padding);
          swap(temp1,temp2);
       }
     }
 } //--------------------------------------------------------------- // 调用的extract_fhog_features函数  void impl_extract_fhog_features( const image_type& img_, 
                                 out_type& hog, int cell_size,//本例中为8,即每个cell中有8x8个像素点 int filter_rows_padding, int filter_cols_padding ) 
{
    const_image_view img(img_);//定义第一象限和第二象限的9个方向的单位向量 matrix<float,2,1> directions[9];
    directions[0] = 1.0000, 0.0000; 
    directions[1] = 0.9397, 0.3420;
    directions[2] = 0.7660, 0.6428;
    directions[3] = 0.500, 0.8660;
    directions[4] = 0.1736, 0.9848;
    directions[5] = -0.1736, 0.9848;
    directions[6] = -0.5000, 0.8660;
    directions[7] = -0.7660, 0.6428;
    directions[8] = -0.9397, 0.3420; //计算所需内存用于缓存梯度直方图及各cell梯度直方图向量的norm const int cells_nr = (int)((float)img.nr()/(float)cell_size + 0.5); const int cells_nc = (int)((float)img.nc()/(float)cell_size + 0.5); //三维矩阵 cells_nr+2  cells_nc+2  18  array2dfloat,18,1> > hist(cells_nr+2, cells_nc+2); for (long r = 0; r < hist.nr(); ++r) for (long c = 0; c < hist.nc(); ++c)
           hist[r][c] = 0;//初始化为0 array2d<float> norm(cells_nr, cells_nc);// 各cell梯度直方图的norm值矩阵 assign_all_pixels(norm, 0);//初始化为0 // memory for HOG features const int hog_nr = std::max(cells_nr-2, 0); const int hog_nc = std::max(cells_nc-2, 0); const int padding_rows_offset = (filter_rows_padding-1)/2; const int padding_cols_offset = (filter_cols_padding-1)/2;
    init_hog(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding); const int visible_nr = std::min((long)cells_nr*cell_size,img.nr())-1; const int visible_nc = std::min((long)cells_nc*cell_size,img.nc())-1; //首先计算方向梯度直方图 for (int y = 1; y < visible_nr; y++) //遍历图像纵坐标 { const float yp = ((float)y+0.5)/(float)cell_size - 0.5; const int iyp = (int)std::floor(yp); const float vy0 = yp - iyp; const float vy1 = 1.0 - vy0; int x; for (x = 1; x < visible_nc; x++) 
         {
            matrix<float, 2, 1> grad; float v;
            get_gradient(y,x,img,grad,v);//做水平垂直差分,计算当前像素点的梯度(dy,dx) // 即找出最大的内积绝对值 和 方向 float best_dot = 0; int best_o = 0; for (int o = 0; o < 9; o++) 
            { const float dot = dlib::dot(directions[o], grad);//点乘计算内积 if (dot > best_dot) //在一、二象限内积为正 {
                  best_dot = dot;
                  best_o = o;
               } else if (-dot > best_dot) //在三、四象限内积为负 {
                  best_dot = -dot;
                  best_o = o+9;
               }
            }
            v = std::sqrt(v); // add to 4 histograms around pixel using bilinear interpolation const float xp = ((double)x + 0.5) / (double)cell_size - 0.5; const int ixp = (int)std::floor(xp); const float vx0 = xp - ixp; const float vx1 = 1.0 - vx0; // 插值,抑制突变 hist[iyp+1][ixp+1](best_o) += vy1*vx1*v;
             hist[iyp+1+1][ixp+1](best_o) += vy0*vx1*v;
             hist[iyp+1][ixp+1+1](best_o) += vy1*vx0*v;
             hist[iyp+1+1][ixp+1+1](best_o) += vy0*vx0*v;
         }
    } //计算每个cell的energy for (int r = 0; r < cells_nr; ++r)
    { for (int c = 0; c < cells_nc; ++c)
       { for (int o = 0; o < 9; o++) 
          {// 平方和求模 norm[r][c] += (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)) *
                          (hist[r+1][c+1](o) + hist[r+1][c+1](o+9));
          }
       }
    } //然后。。。。太多了不抄代码了,大致如下: for (int y = 0; y < hog_nr; y++) 
    { for (int x = 0; x < hog_nc; x++) 
       { forint o = 0; o < 31; o++)
            {
               set_hog(hog,o,x,y,18维的单位向量[某种映射关系]);
            }
       }
    } //得到该图的hog特征,hog  (h/8 ,w/8 ,31),即图像中每个8x8的块,对应一个31维的向量  }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167

前面提取到了金字塔每一层的hog特征,一并存入feats中,然后开始检测

void detect_from_fhog_pyramid ( const array<arrayfloat> > >& feats, const feature_extractor_type& fe, const fhog_filterbank& w, const double thresh, const unsigned long det_box_height, const unsigned long det_box_width, const int cell_size, const int filter_rows_padding, const int filter_cols_padding, std::vector<std::pair<double, rectangle> >& dets ) 
{
   dets.clear();//初始化 array2d<float> saliency_image;//显著性系数图,系数越大,则对应位置包含目标可能性越高 pyramid_type pyr; //遍历金字塔所有层 for (unsigned long l = 0; l < feats.size(); ++l)
   { // 检测每一层,得到每一层的显著性系数图,及目标区域 const rectangle area = apply_filters_to_fhog(w, feats[l], saliency_image); // 遍历该区域 for (long r = area.top(); r <= area.bottom(); ++r)
      { for (long c = area.left(); c <= area.right(); ++c)
         { // if we found a detection if (saliency_image[r][c] >= thresh)//若该处显著性系数大于阈值 { // 取出该处的矩阵位置 rectangle temp = centered_rect(point(c,r),det_box_width,det_box_height);
               rectangle rect = fe.feats_to_image(temp,cell_size, 
                                         filter_rows_padding, filter_cols_padding);
               rect = pyr.rect_up(rect, l);//放大到正常大小 dets.push_back(std::make_pair(saliency_image[r][c], rect));
             }
          }
      }
   } std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);//根据显著性系数排序 }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39

检测每一层

rectangle apply_filters_to_fhog( const fhog_filterbank& w,//读入的本地检测模型 const arrayfloat> >& feats,//当前层hog特征集 array2d<float>& saliency_image )
{
    rectangle area;
    saliency_image.clear();
    array2d<float> scratch; unsigned long i = 0; for (j=0; i < w.row_filters.size(); ++i)// 31个卷积核 {// 31个共同投票求和 for (unsigned long j = 0; j < w.row_filters[i].size(); ++j)// 2 或者 3  {// row_filters[i][j] 和w.col_filters[i][j] 均为长度为10的向量 说明检测窗口是80×80 像素  area = float_spatially_filter_image_separable(feats[i], saliency_image, 
                           w.row_filters[i][j], w.col_filters[i][j],scratch,true); //先横向卷积,后纵向卷积,两次一维卷积比二维卷积速度更快 }
    } if (saliency_image.size() == 0)
    {
        saliency_image.set_size(feats[0].nr(), feats[0].nc());
        assign_all_pixels(saliency_image, 0);
    } return area;
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25



  • spatial_filtering.h 


两次一维卷积

rectangle float_spatially_filter_image_separable( const in_image_type& in_img_,
                                                 out_image_type& out_img_, const matrix_exp& _row_filter, const matrix_exp& _col_filter,
                                                 out_image_type& scratch_, bool add_to = false )
{
    const_temp_matrix row_filter(_row_filter);//float[10]的卷积核 const_temp_matrix col_filter(_col_filter);//float[10]的卷积核 const_image_view in_img(in_img_);
    image_view out_img(out_img_);
    out_img.set_size(in_img.nr(),in_img.nc());//卷积结果存入out_img // 卷积过程的起始位置和终止位置 const long first_row = col_filter.size()/2; const long first_col = row_filter.size()/2; const long last_row = in_img.nr() - ((col_filter.size()-1)/2); const long last_col = in_img.nc() - ((row_filter.size()-1)/2); // 使用 row filter for (long r = 0; r < in_img.nr(); ++r)
    { long c =first_col; for ( ; c < last_col; ++c)
       { float p; float temp = 0; for (long n = 0; n < row_filter.size(); ++n)
           {
               temp += in_img[r][c-first_col+n]*row_filter(n);//一维卷积 }
           scratch[r][c] = temp;
       }
    } // 使用 column filter  for (long r = first_row; r < last_row; ++r)
    { long c = first_col; for (; c < last_col; ++c)
        { float temp = 0; for (long m = 0; m < col_filter.size(); ++m)
            {
               temp += scratch[r-first_row+m][c]*col_filter(m);
            }
            out_img[r][c] = add_to? temp : out_img[r][c] + temp;
        }
    } return non_border;
}
阅读(5311) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~