/************************************************************************* * * deepCam Shenzhen CONFIDENTIAL * FILE: * * [2016] - [2019] DeepCam Shenzhen * All Rights Reserved. NOTICE: * All information contained herein is, and remains the property of DeepCam Shenzhen. * The intellectual and technical concepts contained herein are proprietary to DeepCam * Shenzhen and may be covered by China and Foreign Patents,patents in process, and * are protected by trade secret or copyright law. * Dissemination of this information or reproduction of this material * is strictly forbidden unless prior written permission is obtained * DeepCam Shenzhen. * * * Written: Jing.Yi 2021-01-6 * Updated: **************************************************************************/ #include "yoloV5-face.h" template T sigmoid(const T& n) { return 1 / (1 + exp(-n)); } std::vector yoloV5_face_ncnn::LetterboxImage(const cv::Mat& src, cv::Mat& dst, const cv::Size& out_size) { auto in_h = static_cast(src.rows); auto in_w = static_cast(src.cols); float out_h = out_size.height; float out_w = out_size.width; float scale = std::min(out_w / in_w, out_h / in_h); int mid_h = static_cast(in_h * scale); int mid_w = static_cast(in_w * scale); cv::resize(src, dst, cv::Size(mid_w, mid_h), (0, 0), (0, 0), cv::INTER_NEAREST); int top = (static_cast(out_h) - mid_h) / 2; int down = (static_cast(out_h) - mid_h + 1) / 2; int left = (static_cast(out_w) - mid_w) / 2; int right = (static_cast(out_w) - mid_w + 1) / 2; cv::copyMakeBorder(dst, dst, top, down, left, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114)); std::vector pad_info{ static_cast(left), static_cast(top), scale }; return pad_info; } yoloV5_face_ncnn* yoloV5_face_ncnn::getInstance() { static yoloV5_face_ncnn instance; return &instance; } int yoloV5_face_ncnn::loadModel(std::string model_path, DimsNCHW dim_ifm) { m_net = std::make_shared(); m_net->load_param((model_path + std::string("/faceDetect.param")).c_str()); m_net->load_model((model_path + std::string("/faceDetect.bin")).c_str()); m_dimIfm = dim_ifm; return 0; } void yoloV5_face_ncnn::nms(std::vector &input_boxes, float NMS_THRESH) { std::sort(input_boxes.begin(), input_boxes.end(), [](Anchor a, Anchor b) {return a.score > b.score; }); std::vectorvArea(input_boxes.size()); for (int i = 0; i < int(input_boxes.size()); ++i) { // vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) // * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); vArea[i] = (input_boxes.at(i).finalbox[2] - input_boxes.at(i).finalbox[0] + 1) * (input_boxes.at(i).finalbox[3] - input_boxes.at(i).finalbox[1] + 1); } for (int i = 0; i < int(input_boxes.size()); ++i) { for (int j = i + 1; j < int(input_boxes.size());) { float xx1 = std::max(input_boxes[i].finalbox[0], input_boxes[j].finalbox[0]); float yy1 = std::max(input_boxes[i].finalbox[1], input_boxes[j].finalbox[1]); float xx2 = std::min(input_boxes[i].finalbox[2], input_boxes[j].finalbox[2]); float yy2 = std::min(input_boxes[i].finalbox[3], input_boxes[j].finalbox[3]); float w = std::max(float(0), xx2 - xx1 + 1); float h = std::max(float(0), yy2 - yy1 + 1); float inter = w * h; float ovr = inter / (vArea[i] + vArea[j] - inter); if (ovr >= NMS_THRESH) { input_boxes.erase(input_boxes.begin() + j); vArea.erase(vArea.begin() + j); } else { j++; } } } } void yoloV5_face_ncnn::decode(ncnn::Mat& output, std::vector& info, std::vector anchor,int net_w, int net_h, std::vector& result) { float left = info[0]; float top = info[1]; float scale = info[2]; int fea_h = output.h; int fea_w = output.w; int spacial_size = fea_w * fea_h; float* ptr = (float*)(output.data); for (int c = 0; c < anchor.size() / 2; c++) { float anchor_w = float(anchor[c * 2 + 0]); float anchor_h = float(anchor[c * 2 + 1]); float* ptr_x = ptr + spacial_size * (c * 16 + 0); float* ptr_y = ptr + spacial_size * (c * 16 + 1); float* ptr_w = ptr + spacial_size * (c * 16 + 2); float* ptr_h = ptr + spacial_size * (c * 16 + 3); float* ptr_s = ptr + spacial_size * (c * 16 + 4); float* ptr_lx1 = ptr + spacial_size * (c * 16 + 5); float* ptr_ly1 = ptr + spacial_size * (c * 16 + 6); float* ptr_lx2 = ptr + spacial_size * (c * 16 + 7); float* ptr_ly2 = ptr + spacial_size * (c * 16 + 8); float* ptr_lx3 = ptr + spacial_size * (c * 16 + 9); float* ptr_ly3 = ptr + spacial_size * (c * 16 + 10); float* ptr_lx4 = ptr + spacial_size * (c * 16 + 11); float* ptr_ly4 = ptr + spacial_size * (c * 16 + 12); float* ptr_lx5 = ptr + spacial_size * (c * 16 + 13); float* ptr_ly5 = ptr + spacial_size * (c * 16 + 14); float* ptr_c = ptr + spacial_size * (c * 16 + 15); float stride_w = net_w / fea_w; float stride_h = net_h / fea_h; for (int i = 0; i < fea_h; i++) { for (int j = 0; j < fea_w; j++) { int index = i * fea_w + j; float confidence = sigmoid(ptr_s[index]);// * sigmoid(ptr_c[index]); if (confidence > 0.4) { float dx = sigmoid(ptr_x[index]); float dy = sigmoid(ptr_y[index]); float dw = sigmoid(ptr_w[index]); float dh = sigmoid(ptr_h[index]); float pb_cx = (dx * 2.f - 0.5f + j) * stride_w; float pb_cy = (dy * 2.f - 0.5f + i) * stride_h; float pb_w = pow(dw * 2.f, 2) * anchor_w; float pb_h = pow(dh * 2.f, 2) * anchor_h; float x0 = pb_cx - pb_w * 0.5f; float y0 = pb_cy - pb_h * 0.5f; float x1 = pb_cx + pb_w * 0.5f; float y1 = pb_cy + pb_h * 0.5f; Anchor temp_box; temp_box.finalbox = RectLite((x0 - left) / scale,(y0 - top) / scale,(x1 - left) / scale,(y1 - top) / scale); temp_box.score = confidence; // temp_box.x1 = (x0 - left) / scale; // temp_box.y1 = (y0 - top) / scale; // temp_box.x2 = (x1 - left) / scale; // temp_box.y2 = (y1 - top) / scale; float lx1 = ptr_lx1[index] * anchor_w + j * stride_w; float ly1 = ptr_ly1[index] * anchor_h + i * stride_h; float lx2 = ptr_lx2[index] * anchor_w + j * stride_w; float ly2 = ptr_ly2[index] * anchor_h + i * stride_h; float lx3 = ptr_lx3[index] * anchor_w + j * stride_w; float ly3 = ptr_ly3[index] * anchor_h + i * stride_h; float lx4 = ptr_lx4[index] * anchor_w + j * stride_w; float ly4 = ptr_ly4[index] * anchor_h + i * stride_h; float lx5 = ptr_lx5[index] * anchor_w + j * stride_w; float ly5 = ptr_ly5[index] * anchor_h + i * stride_h; temp_box.pts[0] = PointLite ((lx1 - left) / scale,(ly1 - top) / scale); temp_box.pts[1] = PointLite ((lx2 - left) / scale,(ly2 - top) / scale); temp_box.pts[2] = PointLite ((lx3 - left) / scale,(ly3 - top) / scale); temp_box.pts[3] = PointLite ((lx4 - left) / scale,(ly4 - top) / scale); temp_box.pts[4] = PointLite ((lx5 - left) / scale,(ly5 - top) / scale); result.push_back(temp_box); //temp_box.lanmarks[0].x = (lx1 - left) / scale; //temp_box.lanmarks[0].y = (ly1 - top) / scale; //temp_box.lanmarks[1].x = (lx2 - left) / scale; //temp_box.lanmarks[1].y = (ly2 - top) / scale; //temp_box.lanmarks[2].x = (lx3 - left) / scale; //temp_box.lanmarks[2].y = (ly3 - top) / scale; //temp_box.lanmarks[3].x = (lx4 - left) / scale; //temp_box.lanmarks[3].y = (ly4 - top) / scale; //temp_box.lanmarks[4].x = (lx5 - left) / scale; //temp_box.lanmarks[4].y = (ly5 - top) / scale; //prebox.push_back(temp_box); } } } } } std::vector& yoloV5_face_ncnn::Detect(cv::Mat& image) { m_result.clear(); cv::Mat dst; std::vector infos = LetterboxImage(image, dst, cv::Size(m_dimIfm.w(), m_dimIfm.h())); ncnn::Mat in = ncnn::Mat::from_pixels_resize(dst.data, ncnn::Mat::PIXEL_RGB, dst.cols, dst.rows, m_dimIfm.w(), m_dimIfm.h()); float norm[3] = { 1 / 255.f, 1 / 255.f, 1 / 255.f }; float mean[3] = { 0, 0, 0 }; in.substract_mean_normalize(mean, norm); auto ex = m_net->create_extractor(); ex.set_light_mode(true); ex.set_num_threads(2); ex.input(0, in); ncnn::Mat blob; ex.extract("stride_32", blob); decode(blob, infos, m_anchor32, dst.cols, dst.rows, m_result); ex.extract("stride_16", blob); decode(blob, infos, m_anchor16, dst.cols, dst.rows, m_result); ex.extract("stride_8", blob); decode(blob, infos, m_anchor8, dst.cols, dst.rows, m_result); nms(m_result, 0.4); // blob.release(); // in.release(); ex.clear(); return m_result; }