241 lines
8.4 KiB
C++
241 lines
8.4 KiB
C++
/*************************************************************************
|
|
*
|
|
* deepCam Shenzhen CONFIDENTIAL
|
|
* FILE: <tag>
|
|
*
|
|
* [2016] - [2019] DeepCam Shenzhen
|
|
* All Rights Reserved.
|
|
|
|
NOTICE:
|
|
* All information contained herein is, and remains the property of DeepCam Shenzhen.
|
|
* The intellectual and technical concepts contained herein are proprietary to DeepCam
|
|
* Shenzhen and may be covered by China and Foreign Patents,patents in process, and
|
|
* are protected by trade secret or copyright law.
|
|
* Dissemination of this information or reproduction of this material
|
|
* is strictly forbidden unless prior written permission is obtained
|
|
* DeepCam Shenzhen.
|
|
*
|
|
*
|
|
* Written: Jing.Yi 2021-01-6
|
|
* Updated:
|
|
**************************************************************************/
|
|
|
|
#include "yoloV5-face.h"
|
|
|
|
template <typename T>
|
|
T sigmoid(const T& n) {
|
|
return 1 / (1 + exp(-n));
|
|
}
|
|
|
|
std::vector<float> yoloV5_face_ncnn::LetterboxImage(const cv::Mat& src, cv::Mat& dst, const cv::Size& out_size)
|
|
{
|
|
auto in_h = static_cast<float>(src.rows);
|
|
auto in_w = static_cast<float>(src.cols);
|
|
float out_h = out_size.height;
|
|
float out_w = out_size.width;
|
|
float scale = std::min(out_w / in_w, out_h / in_h);
|
|
|
|
int mid_h = static_cast<int>(in_h * scale);
|
|
int mid_w = static_cast<int>(in_w * scale);
|
|
|
|
cv::resize(src, dst, cv::Size(mid_w, mid_h), (0, 0), (0, 0), cv::INTER_NEAREST);
|
|
|
|
int top = (static_cast<int>(out_h) - mid_h) / 2;
|
|
int down = (static_cast<int>(out_h) - mid_h + 1) / 2;
|
|
int left = (static_cast<int>(out_w) - mid_w) / 2;
|
|
int right = (static_cast<int>(out_w) - mid_w + 1) / 2;
|
|
|
|
cv::copyMakeBorder(dst, dst, top, down, left, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
|
|
std::vector<float> pad_info{ static_cast<float>(left), static_cast<float>(top), scale };
|
|
return pad_info;
|
|
}
|
|
|
|
yoloV5_face_ncnn* yoloV5_face_ncnn::getInstance()
|
|
{
|
|
static yoloV5_face_ncnn instance;
|
|
return &instance;
|
|
}
|
|
|
|
int yoloV5_face_ncnn::loadModel(std::string model_path, DimsNCHW dim_ifm)
|
|
{
|
|
m_net = std::make_shared<ncnn::Net>();
|
|
m_net->load_param((model_path + std::string("/faceDetect.param")).c_str());
|
|
m_net->load_model((model_path + std::string("/faceDetect.bin")).c_str());
|
|
|
|
m_dimIfm = dim_ifm;
|
|
return 0;
|
|
}
|
|
|
|
void yoloV5_face_ncnn::nms(std::vector<Anchor> &input_boxes, float NMS_THRESH)
|
|
{
|
|
std::sort(input_boxes.begin(), input_boxes.end(), [](Anchor a, Anchor b) {return a.score > b.score; });
|
|
|
|
std::vector<float>vArea(input_boxes.size());
|
|
for (int i = 0; i < int(input_boxes.size()); ++i)
|
|
{
|
|
// vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
|
|
// * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
|
|
|
|
vArea[i] = (input_boxes.at(i).finalbox[2] - input_boxes.at(i).finalbox[0] + 1)
|
|
* (input_boxes.at(i).finalbox[3] - input_boxes.at(i).finalbox[1] + 1);
|
|
}
|
|
for (int i = 0; i < int(input_boxes.size()); ++i)
|
|
{
|
|
for (int j = i + 1; j < int(input_boxes.size());)
|
|
{
|
|
float xx1 = std::max(input_boxes[i].finalbox[0], input_boxes[j].finalbox[0]);
|
|
float yy1 = std::max(input_boxes[i].finalbox[1], input_boxes[j].finalbox[1]);
|
|
float xx2 = std::min(input_boxes[i].finalbox[2], input_boxes[j].finalbox[2]);
|
|
float yy2 = std::min(input_boxes[i].finalbox[3], input_boxes[j].finalbox[3]);
|
|
float w = std::max(float(0), xx2 - xx1 + 1);
|
|
float h = std::max(float(0), yy2 - yy1 + 1);
|
|
float inter = w * h;
|
|
float ovr = inter / (vArea[i] + vArea[j] - inter);
|
|
if (ovr >= NMS_THRESH)
|
|
{
|
|
input_boxes.erase(input_boxes.begin() + j);
|
|
vArea.erase(vArea.begin() + j);
|
|
}
|
|
else
|
|
{
|
|
j++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void yoloV5_face_ncnn::decode(ncnn::Mat& output, std::vector<float>& info, std::vector<int> anchor,int net_w, int net_h, std::vector<Anchor>& result)
|
|
{
|
|
float left = info[0];
|
|
float top = info[1];
|
|
float scale = info[2];
|
|
|
|
int fea_h = output.h;
|
|
int fea_w = output.w;
|
|
int spacial_size = fea_w * fea_h;
|
|
|
|
float* ptr = (float*)(output.data);
|
|
for (int c = 0; c < anchor.size() / 2; c++) {
|
|
float anchor_w = float(anchor[c * 2 + 0]);
|
|
float anchor_h = float(anchor[c * 2 + 1]);
|
|
float* ptr_x = ptr + spacial_size * (c * 16 + 0);
|
|
float* ptr_y = ptr + spacial_size * (c * 16 + 1);
|
|
float* ptr_w = ptr + spacial_size * (c * 16 + 2);
|
|
float* ptr_h = ptr + spacial_size * (c * 16 + 3);
|
|
float* ptr_s = ptr + spacial_size * (c * 16 + 4);
|
|
|
|
float* ptr_lx1 = ptr + spacial_size * (c * 16 + 5);
|
|
float* ptr_ly1 = ptr + spacial_size * (c * 16 + 6);
|
|
float* ptr_lx2 = ptr + spacial_size * (c * 16 + 7);
|
|
float* ptr_ly2 = ptr + spacial_size * (c * 16 + 8);
|
|
float* ptr_lx3 = ptr + spacial_size * (c * 16 + 9);
|
|
float* ptr_ly3 = ptr + spacial_size * (c * 16 + 10);
|
|
float* ptr_lx4 = ptr + spacial_size * (c * 16 + 11);
|
|
float* ptr_ly4 = ptr + spacial_size * (c * 16 + 12);
|
|
float* ptr_lx5 = ptr + spacial_size * (c * 16 + 13);
|
|
float* ptr_ly5 = ptr + spacial_size * (c * 16 + 14);
|
|
|
|
float* ptr_c = ptr + spacial_size * (c * 16 + 15);
|
|
|
|
float stride_w = net_w / fea_w;
|
|
float stride_h = net_h / fea_h;
|
|
for (int i = 0; i < fea_h; i++)
|
|
{
|
|
for (int j = 0; j < fea_w; j++)
|
|
{
|
|
int index = i * fea_w + j;
|
|
float confidence = sigmoid(ptr_s[index]);// * sigmoid(ptr_c[index]);
|
|
if (confidence > 0.4)
|
|
{
|
|
float dx = sigmoid(ptr_x[index]);
|
|
float dy = sigmoid(ptr_y[index]);
|
|
float dw = sigmoid(ptr_w[index]);
|
|
float dh = sigmoid(ptr_h[index]);
|
|
|
|
float pb_cx = (dx * 2.f - 0.5f + j) * stride_w;
|
|
float pb_cy = (dy * 2.f - 0.5f + i) * stride_h;
|
|
|
|
float pb_w = pow(dw * 2.f, 2) * anchor_w;
|
|
float pb_h = pow(dh * 2.f, 2) * anchor_h;
|
|
|
|
float x0 = pb_cx - pb_w * 0.5f;
|
|
float y0 = pb_cy - pb_h * 0.5f;
|
|
float x1 = pb_cx + pb_w * 0.5f;
|
|
float y1 = pb_cy + pb_h * 0.5f;
|
|
|
|
|
|
Anchor temp_box;
|
|
temp_box.finalbox = RectLite<float>((x0 - left) / scale,(y0 - top) / scale,(x1 - left) / scale,(y1 - top) / scale);
|
|
temp_box.score = confidence;
|
|
// temp_box.x1 = (x0 - left) / scale;
|
|
// temp_box.y1 = (y0 - top) / scale;
|
|
// temp_box.x2 = (x1 - left) / scale;
|
|
// temp_box.y2 = (y1 - top) / scale;
|
|
|
|
float lx1 = ptr_lx1[index] * anchor_w + j * stride_w;
|
|
float ly1 = ptr_ly1[index] * anchor_h + i * stride_h;
|
|
float lx2 = ptr_lx2[index] * anchor_w + j * stride_w;
|
|
float ly2 = ptr_ly2[index] * anchor_h + i * stride_h;
|
|
float lx3 = ptr_lx3[index] * anchor_w + j * stride_w;
|
|
float ly3 = ptr_ly3[index] * anchor_h + i * stride_h;
|
|
float lx4 = ptr_lx4[index] * anchor_w + j * stride_w;
|
|
float ly4 = ptr_ly4[index] * anchor_h + i * stride_h;
|
|
float lx5 = ptr_lx5[index] * anchor_w + j * stride_w;
|
|
float ly5 = ptr_ly5[index] * anchor_h + i * stride_h;
|
|
|
|
temp_box.pts[0] = PointLite <F32>((lx1 - left) / scale,(ly1 - top) / scale);
|
|
temp_box.pts[1] = PointLite <F32>((lx2 - left) / scale,(ly2 - top) / scale);
|
|
temp_box.pts[2] = PointLite <F32>((lx3 - left) / scale,(ly3 - top) / scale);
|
|
temp_box.pts[3] = PointLite <F32>((lx4 - left) / scale,(ly4 - top) / scale);
|
|
temp_box.pts[4] = PointLite <F32>((lx5 - left) / scale,(ly5 - top) / scale);
|
|
|
|
|
|
result.push_back(temp_box);
|
|
//temp_box.lanmarks[0].x = (lx1 - left) / scale;
|
|
//temp_box.lanmarks[0].y = (ly1 - top) / scale;
|
|
//temp_box.lanmarks[1].x = (lx2 - left) / scale;
|
|
//temp_box.lanmarks[1].y = (ly2 - top) / scale;
|
|
//temp_box.lanmarks[2].x = (lx3 - left) / scale;
|
|
//temp_box.lanmarks[2].y = (ly3 - top) / scale;
|
|
//temp_box.lanmarks[3].x = (lx4 - left) / scale;
|
|
//temp_box.lanmarks[3].y = (ly4 - top) / scale;
|
|
//temp_box.lanmarks[4].x = (lx5 - left) / scale;
|
|
//temp_box.lanmarks[4].y = (ly5 - top) / scale;
|
|
//prebox.push_back(temp_box);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
std::vector<Anchor>& yoloV5_face_ncnn::Detect(cv::Mat& image)
|
|
{
|
|
m_result.clear();
|
|
cv::Mat dst;
|
|
std::vector<float> infos = LetterboxImage(image, dst, cv::Size(m_dimIfm.w(), m_dimIfm.h()));
|
|
ncnn::Mat in = ncnn::Mat::from_pixels_resize(dst.data, ncnn::Mat::PIXEL_RGB, dst.cols, dst.rows, m_dimIfm.w(), m_dimIfm.h());
|
|
float norm[3] = { 1 / 255.f, 1 / 255.f, 1 / 255.f };
|
|
float mean[3] = { 0, 0, 0 };
|
|
in.substract_mean_normalize(mean, norm);
|
|
auto ex = m_net->create_extractor();
|
|
ex.set_light_mode(true);
|
|
ex.set_num_threads(2);
|
|
ex.input(0, in);
|
|
|
|
|
|
ncnn::Mat blob;
|
|
ex.extract("stride_32", blob);
|
|
decode(blob, infos, m_anchor32, dst.cols, dst.rows, m_result);
|
|
ex.extract("stride_16", blob);
|
|
decode(blob, infos, m_anchor16, dst.cols, dst.rows, m_result);
|
|
ex.extract("stride_8", blob);
|
|
decode(blob, infos, m_anchor8, dst.cols, dst.rows, m_result);
|
|
nms(m_result, 0.4);
|
|
|
|
// blob.release();
|
|
// in.release();
|
|
ex.clear();
|
|
return m_result;
|
|
}
|