DeepCamFaceSDK2.0/TEST/test_track/yoloV5-face.cpp

/*************************************************************************
*
* deepCam Shenzhen CONFIDENTIAL
* FILE: <tag>
*
*  [2016] - [2019] DeepCam Shenzhen
*  All Rights Reserved.

NOTICE:
* All information contained herein is, and remains the property of DeepCam Shenzhen.
* The intellectual and technical concepts contained herein are proprietary to DeepCam
* Shenzhen and may be covered by China and Foreign Patents,patents in process, and
* are protected by trade secret or copyright law.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* DeepCam Shenzhen.
*
*
* Written: Jing.Yi 2021-01-6
* Updated:
**************************************************************************/

#include "yoloV5-face.h"

template <typename T>
T sigmoid(const T& n) {
	return 1 / (1 + exp(-n));
}

std::vector<float> yoloV5_face_ncnn::LetterboxImage(const cv::Mat& src, cv::Mat& dst, const cv::Size& out_size)
{
	auto in_h = static_cast<float>(src.rows);
	auto in_w = static_cast<float>(src.cols);
	float out_h = out_size.height;
	float out_w = out_size.width;
	float scale = std::min(out_w / in_w, out_h / in_h);

	int mid_h = static_cast<int>(in_h * scale);
	int mid_w = static_cast<int>(in_w * scale);

	cv::resize(src, dst, cv::Size(mid_w, mid_h), (0, 0), (0, 0), cv::INTER_NEAREST);

	int top = (static_cast<int>(out_h) - mid_h) / 2;
	int down = (static_cast<int>(out_h) - mid_h + 1) / 2;
	int left = (static_cast<int>(out_w) - mid_w) / 2;
	int right = (static_cast<int>(out_w) - mid_w + 1) / 2;

	cv::copyMakeBorder(dst, dst, top, down, left, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
	std::vector<float> pad_info{ static_cast<float>(left), static_cast<float>(top), scale };
	return pad_info;
}

yoloV5_face_ncnn* yoloV5_face_ncnn::getInstance()
{
	static yoloV5_face_ncnn instance;
	return &instance;
}

int yoloV5_face_ncnn::loadModel(std::string model_path, DimsNCHW dim_ifm)
{
	m_net = std::make_shared<ncnn::Net>();
	m_net->load_param((model_path + std::string("/faceDetect.param")).c_str());
	m_net->load_model((model_path + std::string("/faceDetect.bin")).c_str());

	m_dimIfm = dim_ifm;
	return 0;
}

void yoloV5_face_ncnn::nms(std::vector<Anchor> &input_boxes, float NMS_THRESH)
{
	std::sort(input_boxes.begin(), input_boxes.end(), [](Anchor a, Anchor b) {return a.score > b.score; });

	std::vector<float>vArea(input_boxes.size());
	for (int i = 0; i < int(input_boxes.size()); ++i)
	{
		// vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
		// 	* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);

		vArea[i] = (input_boxes.at(i).finalbox[2] - input_boxes.at(i).finalbox[0] + 1)
			* (input_boxes.at(i).finalbox[3] - input_boxes.at(i).finalbox[1] + 1);
	}
	for (int i = 0; i < int(input_boxes.size()); ++i)
	{
		for (int j = i + 1; j < int(input_boxes.size());)
		{
			float xx1 = std::max(input_boxes[i].finalbox[0], input_boxes[j].finalbox[0]);
			float yy1 = std::max(input_boxes[i].finalbox[1], input_boxes[j].finalbox[1]);
			float xx2 = std::min(input_boxes[i].finalbox[2], input_boxes[j].finalbox[2]);
			float yy2 = std::min(input_boxes[i].finalbox[3], input_boxes[j].finalbox[3]);
			float w = std::max(float(0), xx2 - xx1 + 1);
			float h = std::max(float(0), yy2 - yy1 + 1);
			float inter = w * h;
			float ovr = inter / (vArea[i] + vArea[j] - inter);
			if (ovr >= NMS_THRESH)
			{
				input_boxes.erase(input_boxes.begin() + j);
				vArea.erase(vArea.begin() + j);
			}
			else
			{
				j++;
			}
		}
	}
}

void yoloV5_face_ncnn::decode(ncnn::Mat& output, std::vector<float>& info, std::vector<int> anchor,int net_w, int net_h, std::vector<Anchor>& result)
{
	float left = info[0];
	float top = info[1];
	float scale = info[2];

	int fea_h = output.h;
	int fea_w = output.w;
	int spacial_size = fea_w * fea_h;

	float* ptr = (float*)(output.data);
	for (int c = 0; c < anchor.size() / 2; c++) {
		float anchor_w = float(anchor[c * 2 + 0]);
		float anchor_h = float(anchor[c * 2 + 1]);
		float* ptr_x = ptr + spacial_size * (c * 16 + 0);
		float* ptr_y = ptr + spacial_size * (c * 16 + 1);
		float* ptr_w = ptr + spacial_size * (c * 16 + 2);
		float* ptr_h = ptr + spacial_size * (c * 16 + 3);
		float* ptr_s = ptr + spacial_size * (c * 16 + 4);

		float* ptr_lx1 = ptr + spacial_size * (c * 16 + 5);
		float* ptr_ly1 = ptr + spacial_size * (c * 16 + 6);
		float* ptr_lx2 = ptr + spacial_size * (c * 16 + 7);
		float* ptr_ly2 = ptr + spacial_size * (c * 16 + 8);
		float* ptr_lx3 = ptr + spacial_size * (c * 16 + 9);
		float* ptr_ly3 = ptr + spacial_size * (c * 16 + 10);
		float* ptr_lx4 = ptr + spacial_size * (c * 16 + 11);
		float* ptr_ly4 = ptr + spacial_size * (c * 16 + 12);
		float* ptr_lx5 = ptr + spacial_size * (c * 16 + 13);
		float* ptr_ly5 = ptr + spacial_size * (c * 16 + 14);

		float* ptr_c = ptr + spacial_size * (c * 16 + 15);

		float stride_w = net_w / fea_w;
		float stride_h = net_h / fea_h;
		for (int i = 0; i < fea_h; i++)
		{
			for (int j = 0; j < fea_w; j++)
			{
				int index = i * fea_w + j;
				float confidence = sigmoid(ptr_s[index]);// * sigmoid(ptr_c[index]);
				if (confidence > 0.4)
				{
					float dx = sigmoid(ptr_x[index]);
					float dy = sigmoid(ptr_y[index]);
					float dw = sigmoid(ptr_w[index]);
					float dh = sigmoid(ptr_h[index]);

					float pb_cx = (dx * 2.f - 0.5f + j) * stride_w;
					float pb_cy = (dy * 2.f - 0.5f + i) * stride_h;

					float pb_w = pow(dw * 2.f, 2) * anchor_w;
					float pb_h = pow(dh * 2.f, 2) * anchor_h;

					float x0 = pb_cx - pb_w * 0.5f;
					float y0 = pb_cy - pb_h * 0.5f;
					float x1 = pb_cx + pb_w * 0.5f;
					float y1 = pb_cy + pb_h * 0.5f;


					Anchor temp_box;
					temp_box.finalbox = RectLite<float>((x0 - left) / scale,(y0 - top) / scale,(x1 - left) / scale,(y1 - top) / scale);
					temp_box.score = confidence;
					// temp_box.x1 = (x0 - left) / scale;
					// temp_box.y1 = (y0 - top) / scale;
					// temp_box.x2 = (x1 - left) / scale;
					// temp_box.y2 = (y1 - top) / scale;

					float lx1 = ptr_lx1[index] * anchor_w + j * stride_w;
					float ly1 = ptr_ly1[index] * anchor_h + i * stride_h;
					float lx2 = ptr_lx2[index] * anchor_w + j * stride_w;
					float ly2 = ptr_ly2[index] * anchor_h + i * stride_h;
					float lx3 = ptr_lx3[index] * anchor_w + j * stride_w;
					float ly3 = ptr_ly3[index] * anchor_h + i * stride_h;
					float lx4 = ptr_lx4[index] * anchor_w + j * stride_w;
					float ly4 = ptr_ly4[index] * anchor_h + i * stride_h;
					float lx5 = ptr_lx5[index] * anchor_w + j * stride_w;
					float ly5 = ptr_ly5[index] * anchor_h + i * stride_h;

					temp_box.pts[0] = PointLite <F32>((lx1 - left) / scale,(ly1 - top) / scale);
					temp_box.pts[1] = PointLite <F32>((lx2 - left) / scale,(ly2 - top) / scale);
					temp_box.pts[2] = PointLite <F32>((lx3 - left) / scale,(ly3 - top) / scale);
					temp_box.pts[3] = PointLite <F32>((lx4 - left) / scale,(ly4 - top) / scale);
					temp_box.pts[4] = PointLite <F32>((lx5 - left) / scale,(ly5 - top) / scale);


					result.push_back(temp_box);
					//temp_box.lanmarks[0].x = (lx1 - left) / scale;
					//temp_box.lanmarks[0].y = (ly1 - top) / scale;
					//temp_box.lanmarks[1].x = (lx2 - left) / scale;
					//temp_box.lanmarks[1].y = (ly2 - top) / scale;
					//temp_box.lanmarks[2].x = (lx3 - left) / scale;
					//temp_box.lanmarks[2].y = (ly3 - top) / scale;
					//temp_box.lanmarks[3].x = (lx4 - left) / scale;
					//temp_box.lanmarks[3].y = (ly4 - top) / scale;
					//temp_box.lanmarks[4].x = (lx5 - left) / scale;
					//temp_box.lanmarks[4].y = (ly5 - top) / scale;
					//prebox.push_back(temp_box);
				}
			}
		}
	}

}

std::vector<Anchor>& yoloV5_face_ncnn::Detect(cv::Mat& image)
{
	m_result.clear();
	cv::Mat dst;
	std::vector<float> infos = LetterboxImage(image, dst, cv::Size(m_dimIfm.w(), m_dimIfm.h()));
	ncnn::Mat in = ncnn::Mat::from_pixels_resize(dst.data, ncnn::Mat::PIXEL_RGB, dst.cols, dst.rows, m_dimIfm.w(), m_dimIfm.h());
	float norm[3] = { 1 / 255.f, 1 / 255.f, 1 / 255.f };
	float mean[3] = { 0, 0, 0 };
	in.substract_mean_normalize(mean, norm);
	auto ex = m_net->create_extractor();
	ex.set_light_mode(true);
	ex.set_num_threads(2);
	ex.input(0, in);


	ncnn::Mat blob;
	ex.extract("stride_32", blob);
	decode(blob, infos, m_anchor32, dst.cols, dst.rows, m_result);
	ex.extract("stride_16", blob);
	decode(blob, infos, m_anchor16, dst.cols, dst.rows, m_result);
	ex.extract("stride_8", blob);
	decode(blob, infos, m_anchor8, dst.cols, dst.rows, m_result);
	nms(m_result, 0.4);

	// blob.release();
	// in.release();
	ex.clear();
	return m_result;
}