Files
DeepCamFaceSDK2.0/TEST/test_track/yoloV5-face.cpp
2024-12-13 23:33:37 +08:00

241 lines
8.4 KiB
C++

/*************************************************************************
*
* deepCam Shenzhen CONFIDENTIAL
* FILE: <tag>
*
* [2016] - [2019] DeepCam Shenzhen
* All Rights Reserved.
NOTICE:
* All information contained herein is, and remains the property of DeepCam Shenzhen.
* The intellectual and technical concepts contained herein are proprietary to DeepCam
* Shenzhen and may be covered by China and Foreign Patents,patents in process, and
* are protected by trade secret or copyright law.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* DeepCam Shenzhen.
*
*
* Written: Jing.Yi 2021-01-6
* Updated:
**************************************************************************/
#include "yoloV5-face.h"
template <typename T>
T sigmoid(const T& n) {
return 1 / (1 + exp(-n));
}
std::vector<float> yoloV5_face_ncnn::LetterboxImage(const cv::Mat& src, cv::Mat& dst, const cv::Size& out_size)
{
auto in_h = static_cast<float>(src.rows);
auto in_w = static_cast<float>(src.cols);
float out_h = out_size.height;
float out_w = out_size.width;
float scale = std::min(out_w / in_w, out_h / in_h);
int mid_h = static_cast<int>(in_h * scale);
int mid_w = static_cast<int>(in_w * scale);
cv::resize(src, dst, cv::Size(mid_w, mid_h), (0, 0), (0, 0), cv::INTER_NEAREST);
int top = (static_cast<int>(out_h) - mid_h) / 2;
int down = (static_cast<int>(out_h) - mid_h + 1) / 2;
int left = (static_cast<int>(out_w) - mid_w) / 2;
int right = (static_cast<int>(out_w) - mid_w + 1) / 2;
cv::copyMakeBorder(dst, dst, top, down, left, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
std::vector<float> pad_info{ static_cast<float>(left), static_cast<float>(top), scale };
return pad_info;
}
yoloV5_face_ncnn* yoloV5_face_ncnn::getInstance()
{
static yoloV5_face_ncnn instance;
return &instance;
}
int yoloV5_face_ncnn::loadModel(std::string model_path, DimsNCHW dim_ifm)
{
m_net = std::make_shared<ncnn::Net>();
m_net->load_param((model_path + std::string("/faceDetect.param")).c_str());
m_net->load_model((model_path + std::string("/faceDetect.bin")).c_str());
m_dimIfm = dim_ifm;
return 0;
}
void yoloV5_face_ncnn::nms(std::vector<Anchor> &input_boxes, float NMS_THRESH)
{
std::sort(input_boxes.begin(), input_boxes.end(), [](Anchor a, Anchor b) {return a.score > b.score; });
std::vector<float>vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i)
{
// vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
// * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
vArea[i] = (input_boxes.at(i).finalbox[2] - input_boxes.at(i).finalbox[0] + 1)
* (input_boxes.at(i).finalbox[3] - input_boxes.at(i).finalbox[1] + 1);
}
for (int i = 0; i < int(input_boxes.size()); ++i)
{
for (int j = i + 1; j < int(input_boxes.size());)
{
float xx1 = std::max(input_boxes[i].finalbox[0], input_boxes[j].finalbox[0]);
float yy1 = std::max(input_boxes[i].finalbox[1], input_boxes[j].finalbox[1]);
float xx2 = std::min(input_boxes[i].finalbox[2], input_boxes[j].finalbox[2]);
float yy2 = std::min(input_boxes[i].finalbox[3], input_boxes[j].finalbox[3]);
float w = std::max(float(0), xx2 - xx1 + 1);
float h = std::max(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= NMS_THRESH)
{
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
}
else
{
j++;
}
}
}
}
void yoloV5_face_ncnn::decode(ncnn::Mat& output, std::vector<float>& info, std::vector<int> anchor,int net_w, int net_h, std::vector<Anchor>& result)
{
float left = info[0];
float top = info[1];
float scale = info[2];
int fea_h = output.h;
int fea_w = output.w;
int spacial_size = fea_w * fea_h;
float* ptr = (float*)(output.data);
for (int c = 0; c < anchor.size() / 2; c++) {
float anchor_w = float(anchor[c * 2 + 0]);
float anchor_h = float(anchor[c * 2 + 1]);
float* ptr_x = ptr + spacial_size * (c * 16 + 0);
float* ptr_y = ptr + spacial_size * (c * 16 + 1);
float* ptr_w = ptr + spacial_size * (c * 16 + 2);
float* ptr_h = ptr + spacial_size * (c * 16 + 3);
float* ptr_s = ptr + spacial_size * (c * 16 + 4);
float* ptr_lx1 = ptr + spacial_size * (c * 16 + 5);
float* ptr_ly1 = ptr + spacial_size * (c * 16 + 6);
float* ptr_lx2 = ptr + spacial_size * (c * 16 + 7);
float* ptr_ly2 = ptr + spacial_size * (c * 16 + 8);
float* ptr_lx3 = ptr + spacial_size * (c * 16 + 9);
float* ptr_ly3 = ptr + spacial_size * (c * 16 + 10);
float* ptr_lx4 = ptr + spacial_size * (c * 16 + 11);
float* ptr_ly4 = ptr + spacial_size * (c * 16 + 12);
float* ptr_lx5 = ptr + spacial_size * (c * 16 + 13);
float* ptr_ly5 = ptr + spacial_size * (c * 16 + 14);
float* ptr_c = ptr + spacial_size * (c * 16 + 15);
float stride_w = net_w / fea_w;
float stride_h = net_h / fea_h;
for (int i = 0; i < fea_h; i++)
{
for (int j = 0; j < fea_w; j++)
{
int index = i * fea_w + j;
float confidence = sigmoid(ptr_s[index]);// * sigmoid(ptr_c[index]);
if (confidence > 0.4)
{
float dx = sigmoid(ptr_x[index]);
float dy = sigmoid(ptr_y[index]);
float dw = sigmoid(ptr_w[index]);
float dh = sigmoid(ptr_h[index]);
float pb_cx = (dx * 2.f - 0.5f + j) * stride_w;
float pb_cy = (dy * 2.f - 0.5f + i) * stride_h;
float pb_w = pow(dw * 2.f, 2) * anchor_w;
float pb_h = pow(dh * 2.f, 2) * anchor_h;
float x0 = pb_cx - pb_w * 0.5f;
float y0 = pb_cy - pb_h * 0.5f;
float x1 = pb_cx + pb_w * 0.5f;
float y1 = pb_cy + pb_h * 0.5f;
Anchor temp_box;
temp_box.finalbox = RectLite<float>((x0 - left) / scale,(y0 - top) / scale,(x1 - left) / scale,(y1 - top) / scale);
temp_box.score = confidence;
// temp_box.x1 = (x0 - left) / scale;
// temp_box.y1 = (y0 - top) / scale;
// temp_box.x2 = (x1 - left) / scale;
// temp_box.y2 = (y1 - top) / scale;
float lx1 = ptr_lx1[index] * anchor_w + j * stride_w;
float ly1 = ptr_ly1[index] * anchor_h + i * stride_h;
float lx2 = ptr_lx2[index] * anchor_w + j * stride_w;
float ly2 = ptr_ly2[index] * anchor_h + i * stride_h;
float lx3 = ptr_lx3[index] * anchor_w + j * stride_w;
float ly3 = ptr_ly3[index] * anchor_h + i * stride_h;
float lx4 = ptr_lx4[index] * anchor_w + j * stride_w;
float ly4 = ptr_ly4[index] * anchor_h + i * stride_h;
float lx5 = ptr_lx5[index] * anchor_w + j * stride_w;
float ly5 = ptr_ly5[index] * anchor_h + i * stride_h;
temp_box.pts[0] = PointLite <F32>((lx1 - left) / scale,(ly1 - top) / scale);
temp_box.pts[1] = PointLite <F32>((lx2 - left) / scale,(ly2 - top) / scale);
temp_box.pts[2] = PointLite <F32>((lx3 - left) / scale,(ly3 - top) / scale);
temp_box.pts[3] = PointLite <F32>((lx4 - left) / scale,(ly4 - top) / scale);
temp_box.pts[4] = PointLite <F32>((lx5 - left) / scale,(ly5 - top) / scale);
result.push_back(temp_box);
//temp_box.lanmarks[0].x = (lx1 - left) / scale;
//temp_box.lanmarks[0].y = (ly1 - top) / scale;
//temp_box.lanmarks[1].x = (lx2 - left) / scale;
//temp_box.lanmarks[1].y = (ly2 - top) / scale;
//temp_box.lanmarks[2].x = (lx3 - left) / scale;
//temp_box.lanmarks[2].y = (ly3 - top) / scale;
//temp_box.lanmarks[3].x = (lx4 - left) / scale;
//temp_box.lanmarks[3].y = (ly4 - top) / scale;
//temp_box.lanmarks[4].x = (lx5 - left) / scale;
//temp_box.lanmarks[4].y = (ly5 - top) / scale;
//prebox.push_back(temp_box);
}
}
}
}
}
std::vector<Anchor>& yoloV5_face_ncnn::Detect(cv::Mat& image)
{
m_result.clear();
cv::Mat dst;
std::vector<float> infos = LetterboxImage(image, dst, cv::Size(m_dimIfm.w(), m_dimIfm.h()));
ncnn::Mat in = ncnn::Mat::from_pixels_resize(dst.data, ncnn::Mat::PIXEL_RGB, dst.cols, dst.rows, m_dimIfm.w(), m_dimIfm.h());
float norm[3] = { 1 / 255.f, 1 / 255.f, 1 / 255.f };
float mean[3] = { 0, 0, 0 };
in.substract_mean_normalize(mean, norm);
auto ex = m_net->create_extractor();
ex.set_light_mode(true);
ex.set_num_threads(2);
ex.input(0, in);
ncnn::Mat blob;
ex.extract("stride_32", blob);
decode(blob, infos, m_anchor32, dst.cols, dst.rows, m_result);
ex.extract("stride_16", blob);
decode(blob, infos, m_anchor16, dst.cols, dst.rows, m_result);
ex.extract("stride_8", blob);
decode(blob, infos, m_anchor8, dst.cols, dst.rows, m_result);
nms(m_result, 0.4);
// blob.release();
// in.release();
ex.clear();
return m_result;
}