转自:https://blog.csdn.net/weixin_35793375/article/details/78028545?locationNum=3&fps=1
dlib人脸检测源码解析
dlib是基于c++的跨平台通用框架,其内容非常丰富,涵盖机器学习、图像处理、数据压缩等。原作者链接。dlib在image Processing 模块中定义了基于方向梯度直方图(Histogram of Oriented Gradient,HOG)特征和图像金字塔的多尺度目标检测方法。在非深度学习的领域中,其目标检测算法准确率较高,同时能保证良好的实时性。
本文将以人脸检测为例,详细解析dlib本部分源码。
源码及解析
获取已训练的目标检测模型
namespace dlib
{ typedef object_detector6> > > frontal_face_detector; inline const std::string get_serialized_frontal_faces(); inline frontal_face_detector get_frontal_face_detector();
}
目标检测主函数,分别用5种角度的目标检测器分别调用scanner.detect() 函数找出当前图像中包含人脸的位置
void object_detector:: operator() ( const image_type& img, std::vector& final_dets, double adjust_threshold )
{
scanner.load(img); std::vector<std::pair<double, rectangle> > dets; std::vector dets_accum; for (unsigned long i = 0; i < w.size(); ++i) { const double thresh = w[i].w(scanner.get_num_dimensions());
scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold); for (unsigned long j = 0; j < dets.size(); ++j)
{
rect_detection temp;
temp.detection_confidence = dets[j].first-thresh; temp.weight_index = i;
temp.rect = dets[j].second;
dets_accum.push_back(temp);
}
} final_dets.clear(); if (w.size() > 1) std::sort(dets_accum.rbegin(), dets_accum.rend()); for (unsigned long i = 0; i < dets_accum.size(); ++i)
{ if (overlaps_any_box(final_dets, dets_accum[i].rect)) continue;
final_dets.push_back(dets_accum[i]);
}
}
-
1
-
2
-
3
-
4
-
5
-
6
-
7
-
8
-
9
-
10
-
11
-
12
-
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
-
22
-
23
-
24
-
25
-
26
-
27
-
28
-
29
-
30
-
31
-
32
-
33
-
34
load 载入图像并以金字塔形式提取hog特征
void scan_fhog_pyramid::load (constimage_type& img )
{ unsigned long width, height;
compute_fhog_window_size(width,height); impl::create_fhog_pyramid(img, fe, feats, cell_size, height,
width, min_pyramid_layer_width, min_pyramid_layer_height,
max_pyramid_levels);
} void create_fhog_pyramid ( const image_type& img, const feature_extractor_type& fe, array<arrayfloat> > >& feats, int cell_size, int filter_rows_padding, int filter_cols_padding, unsigned long min_pyramid_layer_width, unsigned long min_pyramid_layer_height, unsigned long max_pyramid_levels
)
{ unsigned long levels = 0;
rectangle rect = get_rect(img);
pyramid_type pyr;
{ do {
rect = pyr.rect_down(rect);
++levels;
} while (rect.width() >= min_pyramid_layer_width &&
rect.height() >= min_pyramid_layer_height &&
levels < max_pyramid_levels);
} if (feats.max_size() < levels)
feats.set_max_size(levels);
feats.set_size(levels); fe(img, feats[0], cell_size,filter_rows_padding,filter_cols_padding); if (feats.size() > 1)
{ typedef typename image_traits::pixel_type pixel_type;
array2d temp1, temp2;
pyr(img, temp1); fe(temp1, feats[1], cell_size,filter_rows_padding,filter_cols_padding);
swap(temp1,temp2); for (unsigned long i = 2; i < feats.size(); ++i)
{
pyr(temp2, temp1); fe(temp1, feats[i], cell_size,filter_rows_padding,filter_cols_padding);
swap(temp1,temp2);
}
}
} void impl_extract_fhog_features( const image_type& img_,
out_type& hog, int cell_size, int filter_rows_padding, int filter_cols_padding )
{
const_image_view img(img_); matrix<float,2,1> directions[9];
directions[0] = 1.0000, 0.0000;
directions[1] = 0.9397, 0.3420;
directions[2] = 0.7660, 0.6428;
directions[3] = 0.500, 0.8660;
directions[4] = 0.1736, 0.9848;
directions[5] = -0.1736, 0.9848;
directions[6] = -0.5000, 0.8660;
directions[7] = -0.7660, 0.6428;
directions[8] = -0.9397, 0.3420; const int cells_nr = (int)((float)img.nr()/(float)cell_size + 0.5); const int cells_nc = (int)((float)img.nc()/(float)cell_size + 0.5); array2dfloat,18,1> > hist(cells_nr+2, cells_nc+2); for (long r = 0; r < hist.nr(); ++r) for (long c = 0; c < hist.nc(); ++c)
hist[r][c] = 0; array2d<float> norm(cells_nr, cells_nc); assign_all_pixels(norm, 0); const int hog_nr = std::max(cells_nr-2, 0); const int hog_nc = std::max(cells_nc-2, 0); const int padding_rows_offset = (filter_rows_padding-1)/2; const int padding_cols_offset = (filter_cols_padding-1)/2;
init_hog(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding); const int visible_nr = std::min((long)cells_nr*cell_size,img.nr())-1; const int visible_nc = std::min((long)cells_nc*cell_size,img.nc())-1; for (int y = 1; y < visible_nr; y++) { const float yp = ((float)y+0.5)/(float)cell_size - 0.5; const int iyp = (int)std::floor(yp); const float vy0 = yp - iyp; const float vy1 = 1.0 - vy0; int x; for (x = 1; x < visible_nc; x++)
{
matrix<float, 2, 1> grad; float v;
get_gradient(y,x,img,grad,v); float best_dot = 0; int best_o = 0; for (int o = 0; o < 9; o++)
{ const float dot = dlib::dot(directions[o], grad); if (dot > best_dot) {
best_dot = dot;
best_o = o;
} else if (-dot > best_dot) {
best_dot = -dot;
best_o = o+9;
}
}
v = std::sqrt(v); const float xp = ((double)x + 0.5) / (double)cell_size - 0.5; const int ixp = (int)std::floor(xp); const float vx0 = xp - ixp; const float vx1 = 1.0 - vx0; hist[iyp+1][ixp+1](best_o) += vy1*vx1*v;
hist[iyp+1+1][ixp+1](best_o) += vy0*vx1*v;
hist[iyp+1][ixp+1+1](best_o) += vy1*vx0*v;
hist[iyp+1+1][ixp+1+1](best_o) += vy0*vx0*v;
}
} for (int r = 0; r < cells_nr; ++r)
{ for (int c = 0; c < cells_nc; ++c)
{ for (int o = 0; o < 9; o++)
{ norm[r][c] += (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)) *
(hist[r+1][c+1](o) + hist[r+1][c+1](o+9));
}
}
} for (int y = 0; y < hog_nr; y++)
{ for (int x = 0; x < hog_nc; x++)
{ for(int o = 0; o < 31; o++)
{
set_hog(hog,o,x,y,18维的单位向量[某种映射关系]);
}
}
} }
-
1
-
2
-
3
-
4
-
5
-
6
-
7
-
8
-
9
-
10
-
11
-
12
-
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
-
22
-
23
-
24
-
25
-
26
-
27
-
28
-
29
-
30
-
31
-
32
-
33
-
34
-
35
-
36
-
37
-
38
-
39
-
40
-
41
-
42
-
43
-
44
-
45
-
46
-
47
-
48
-
49
-
50
-
51
-
52
-
53
-
54
-
55
-
56
-
57
-
58
-
59
-
60
-
61
-
62
-
63
-
64
-
65
-
66
-
67
-
68
-
69
-
70
-
71
-
72
-
73
-
74
-
75
-
76
-
77
-
78
-
79
-
80
-
81
-
82
-
83
-
84
-
85
-
86
-
87
-
88
-
89
-
90
-
91
-
92
-
93
-
94
-
95
-
96
-
97
-
98
-
99
-
100
-
101
-
102
-
103
-
104
-
105
-
106
-
107
-
108
-
109
-
110
-
111
-
112
-
113
-
114
-
115
-
116
-
117
-
118
-
119
-
120
-
121
-
122
-
123
-
124
-
125
-
126
-
127
-
128
-
129
-
130
-
131
-
132
-
133
-
134
-
135
-
136
-
137
-
138
-
139
-
140
-
141
-
142
-
143
-
144
-
145
-
146
-
147
-
148
-
149
-
150
-
151
-
152
-
153
-
154
-
155
-
156
-
157
-
158
-
159
-
160
-
161
-
162
-
163
-
164
-
165
-
166
-
167
前面提取到了金字塔每一层的hog特征,一并存入feats中,然后开始检测
void detect_from_fhog_pyramid ( const array<arrayfloat> > >& feats, const feature_extractor_type& fe, const fhog_filterbank& w, const double thresh, const unsigned long det_box_height, const unsigned long det_box_width, const int cell_size, const int filter_rows_padding, const int filter_cols_padding, std::vector<std::pair<double, rectangle> >& dets )
{
dets.clear(); array2d<float> saliency_image; pyramid_type pyr; for (unsigned long l = 0; l < feats.size(); ++l)
{ const rectangle area = apply_filters_to_fhog(w, feats[l], saliency_image); for (long r = area.top(); r <= area.bottom(); ++r)
{ for (long c = area.left(); c <= area.right(); ++c)
{ if (saliency_image[r][c] >= thresh) { rectangle temp = centered_rect(point(c,r),det_box_width,det_box_height);
rectangle rect = fe.feats_to_image(temp,cell_size,
filter_rows_padding, filter_cols_padding);
rect = pyr.rect_up(rect, l); dets.push_back(std::make_pair(saliency_image[r][c], rect));
}
}
}
} std::sort(dets.rbegin(), dets.rend(), compare_pair_rect); }
-
1
-
2
-
3
-
4
-
5
-
6
-
7
-
8
-
9
-
10
-
11
-
12
-
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
-
22
-
23
-
24
-
25
-
26
-
27
-
28
-
29
-
30
-
31
-
32
-
33
-
34
-
35
-
36
-
37
-
38
-
39
检测每一层
rectangle apply_filters_to_fhog( const fhog_filterbank& w, const arrayfloat> >& feats, array2d<float>& saliency_image )
{
rectangle area;
saliency_image.clear();
array2d<float> scratch; unsigned long i = 0; for (j=0; i < w.row_filters.size(); ++i) { for (unsigned long j = 0; j < w.row_filters[i].size(); ++j) { area = float_spatially_filter_image_separable(feats[i], saliency_image,
w.row_filters[i][j], w.col_filters[i][j],scratch,true); }
} if (saliency_image.size() == 0)
{
saliency_image.set_size(feats[0].nr(), feats[0].nc());
assign_all_pixels(saliency_image, 0);
} return area;
}
-
1
-
2
-
3
-
4
-
5
-
6
-
7
-
8
-
9
-
10
-
11
-
12
-
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
-
22
-
23
-
24
-
25
两次一维卷积
rectangle float_spatially_filter_image_separable( const in_image_type& in_img_,
out_image_type& out_img_, const matrix_exp& _row_filter, const matrix_exp& _col_filter,
out_image_type& scratch_, bool add_to = false )
{
const_temp_matrix row_filter(_row_filter); const_temp_matrix col_filter(_col_filter); const_image_view in_img(in_img_);
image_view out_img(out_img_);
out_img.set_size(in_img.nr(),in_img.nc()); const long first_row = col_filter.size()/2; const long first_col = row_filter.size()/2; const long last_row = in_img.nr() - ((col_filter.size()-1)/2); const long last_col = in_img.nc() - ((row_filter.size()-1)/2); for (long r = 0; r < in_img.nr(); ++r)
{ long c =first_col; for ( ; c < last_col; ++c)
{ float p; float temp = 0; for (long n = 0; n < row_filter.size(); ++n)
{
temp += in_img[r][c-first_col+n]*row_filter(n); }
scratch[r][c] = temp;
}
} for (long r = first_row; r < last_row; ++r)
{ long c = first_col; for (; c < last_col; ++c)
{ float temp = 0; for (long m = 0; m < col_filter.size(); ++m)
{
temp += scratch[r-first_row+m][c]*col_filter(m);
}
out_img[r][c] = add_to? temp : out_img[r][c] + temp;
}
} return non_border;
}