Use YOLO detection for pedestrians

opencv
PgSocks 1 year ago
parent f1f08a9a21
commit db9cb09cc1

@ -9,7 +9,7 @@ extern "C" {
#include <SDL2/SDL.h>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/dnn/dnn.hpp>
#include <stdio.h>
#include <stdlib.h>
@ -29,6 +29,8 @@ struct {
int color = 0;
float x = 0, y = 0, z = 0;
int target_x = 0, target_y = 0;
int yaw_cur = 0, pitch_cur = 0;
static Uint32 drive_timer_handler(Uint32 interval, void* param) {
robot_drive((Robot)param, x, y, z);
return 75;
@ -48,16 +50,34 @@ bool stop = false;
bool track = false;
static void processFrameThread(Robot robot) {
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector());
cv::dnn::Net net = cv::dnn::readNet("yolov3-tiny.weights", "yolov3-tiny.cfg");
while(!stop) {
std::unique_lock<std::mutex> lock(mtx);
if(!img.empty()) {
cv::Mat gray;
cv::cvtColor(img, gray, cv::COLOR_BGR2GRAY);
hog.detectMultiScale(gray, found, weights, 0, cv::Size(), cv::Size(), 1.1);
found.clear();
weights.clear();
cv::Mat blob = cv::dnn::blobFromImage(img, 1/255.0, cv::Size(416, 416), cv::Scalar(0, 0, 0), true, false);
net.setInput(blob);
std::vector<cv::Mat> outs;
net.forward(outs, net.getUnconnectedOutLayersNames());
float *data = (float*)outs[0].data;
for(int i = 0; i < outs[0].rows; ++i) {
float final_score = data[4] * data[5];
if(final_score >= 0.0075)
{
weights.push_back(final_score);
int cx = data[0] * img.cols;
int cy = data[1] * img.rows;
int width = data[2] * img.cols;
int height = data[3] * img.rows;
int left = cx - width / 2;
int top = cy - height / 2;
found.push_back(cv::Rect(left, top, width, height));
}
data += 85;
}
target = 0;
for(unsigned i = 0; i < weights.size(); i++) {
@ -65,9 +85,9 @@ static void processFrameThread(Robot robot) {
target = i;
}
if(track) {
float yaw = found[target].x;
float pitch = found[target].y;
if(track && !weights.empty()) {
float yaw = target_x = found[target].x;
float pitch = target_y = found[target].y;
// Normalize the coordinates
yaw = 2 * (yaw - img.cols / 2) / img.cols;
@ -192,6 +212,8 @@ static void captureFrameThread(SDL_Window* window, const char* fname) {
SDL_UpdateTexture(texture, NULL, img.data, img.cols * 3);
SDL_RenderDrawLine(renderer, 1280 / 2, 720 / 2, target_x, target_y);
SDL_RenderClear(renderer);
SDL_RenderCopy(renderer, texture, NULL, NULL);
SDL_RenderPresent(renderer);
@ -246,6 +268,9 @@ int main(int argc, char* argv[]) {
std::thread *captureThread = nullptr;
std::thread *processThread = nullptr;
target_x = 720 / 2;
target_y = 1280 / 2;
while(robot_work(robot)) {
int h = 720, w = 1280;
@ -267,9 +292,11 @@ int main(int argc, char* argv[]) {
case GIMBAL_ACTION_PUSH_CMD:
if(fragment.message.push.gimbalaction.state != 1)
break;
yaw_cur = fragment.message.push.gimbalaction.yaw;
pitch_cur = fragment.message.push.gimbalaction.pitch;
printf("Gimbal action %d\n", fragment.message.push.gimbalaction.id);
printf("\tProgress %d\n", fragment.message.push.gimbalaction.progress);
printf("\tYaw %d, Pitch %d\n", fragment.message.push.gimbalaction.yaw, fragment.message.push.gimbalaction.pitch);
printf("\tYaw %d, Pitch %d\n", yaw_cur, pitch_cur);
break;
case GIMBAL_ROTATE_CMD:
if(fragment.message.resp.gimbalrot.retcode) {
@ -308,7 +335,6 @@ int main(int argc, char* argv[]) {
}
}
std::unique_lock<std::mutex> lock(mtx);
SDL_Event event;
while(SDL_PollEvent(&event)) {
switch(event.type) {
@ -353,7 +379,6 @@ int main(int argc, char* argv[]) {
case SDL_MOUSEBUTTONDOWN:
{
// Get window coordinates
int target_x = 0, target_y = 0;
SDL_GetMouseState(&target_x, &target_y);
float yaw = target_x;
@ -375,7 +400,6 @@ int main(int argc, char* argv[]) {
default: break;
}
}
lock.unlock();
}
if(captureThread != nullptr) {

@ -3,7 +3,7 @@
#include <mutex>
#include <cmath>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/dnn/dnn.hpp>
#include <SDL2/SDL.h>
extern "C" {
@ -22,46 +22,39 @@ unsigned target;
bool stop = false;
static void processFrameThread() {
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector());
cv::dnn::Net net = cv::dnn::readNet("yolov3-tiny.weights", "yolov3-tiny.cfg");
while(!stop) {
std::unique_lock<std::mutex> lock(mtx);
if(!img.empty()) {
cv::Mat gray;
cv::cvtColor(img, gray, cv::COLOR_BGR2GRAY);
hog.detectMultiScale(gray, found, weights, 0, cv::Size(), cv::Size(), 1.1);
target = 0;
for(unsigned i = 0; i < weights.size(); i++) {
if(weights[i] > weights[target])
target = i;
found.clear();
weights.clear();
cv::Mat blob = cv::dnn::blobFromImage(img, 1/255.0, cv::Size(416, 416), cv::Scalar(0, 0, 0), true, false);
net.setInput(blob);
std::vector<cv::Mat> outs;
net.forward(outs, net.getUnconnectedOutLayersNames());
float *data = (float*)outs[0].data;
for(int i = 0; i < outs[0].rows; ++i) {
float final_score = data[4] * data[5];
if(final_score >= 0.0075)
{
weights.push_back(final_score);
int cx = data[0] * img.cols;
int cy = data[1] * img.rows;
int width = data[2] * img.cols;
int height = data[3] * img.rows;
int left = cx - width / 2;
int top = cy - height / 2;
found.push_back(cv::Rect(left, top, width, height));
}
data += 85;
}
// Get the center of the highest weighted rectangle
float x = found[target].x + found[target].width / 2;
float y = found[target].y + found[target].height / 2;
// Normalize the coordinates
x = 2 * (x - img.cols / 2) / img.cols;
y = 2 * (y - img.rows / 2) / img.rows;
// Get the FOV angle of the point in radians
float FOV = 120 * (M_PI / 180);
x = x * (FOV / 2);
y = y * (FOV / 2);
// Convert to degrees
x = x * (180 / M_PI);
y = y * (180 / M_PI);
printf("y: %f, p: %f\n", x, y);
}
lock.unlock();
SDL_Delay(75);
SDL_Delay(250);
}
}

Loading…
Cancel
Save