Use YOLO detection for pedestrians

3 years ago · db9cb09cc1
parent f1f08a9a21
commit db9cb09cc1
2 changed files with 61 additions and 44 deletions
--- a/src/sdl.cpp
+++ b/src/sdl.cpp
@ -9,7 +9,7 @@ extern "C" {

 #include <SDL2/SDL.h>
 #include <opencv2/opencv.hpp>
-#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/dnn/dnn.hpp>

 #include <stdio.h>
 #include <stdlib.h>
@ -29,6 +29,8 @@ struct {
 int color = 0;

 float x = 0, y = 0, z = 0;
+int target_x = 0, target_y = 0;
+int yaw_cur = 0, pitch_cur = 0;
 static Uint32 drive_timer_handler(Uint32 interval, void* param) {
    robot_drive((Robot)param, x, y, z);
    return 75;
@ -48,16 +50,34 @@ bool stop = false;
 bool track = false;

 static void processFrameThread(Robot robot) {
-    cv::HOGDescriptor hog;
-    hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector());
+    cv::dnn::Net net = cv::dnn::readNet("yolov3-tiny.weights", "yolov3-tiny.cfg");

    while(!stop) {
        std::unique_lock<std::mutex> lock(mtx);

        if(!img.empty()) {
-            cv::Mat gray;
-            cv::cvtColor(img, gray, cv::COLOR_BGR2GRAY);
-            hog.detectMultiScale(gray, found, weights, 0, cv::Size(), cv::Size(), 1.1);
+            found.clear();
+            weights.clear();
+            cv::Mat blob = cv::dnn::blobFromImage(img, 1/255.0, cv::Size(416, 416), cv::Scalar(0, 0, 0), true, false);
+            net.setInput(blob);
+            std::vector<cv::Mat> outs;
+            net.forward(outs, net.getUnconnectedOutLayersNames());
+            float *data = (float*)outs[0].data;
+            for(int i = 0; i < outs[0].rows; ++i) {
+                float final_score = data[4] * data[5];
+                if(final_score >= 0.0075)
+                {
+                    weights.push_back(final_score);
+                    int cx = data[0] * img.cols;
+                    int cy = data[1] * img.rows;
+                    int width = data[2] * img.cols;
+                    int height = data[3] * img.rows;
+                    int left = cx - width / 2;
+                    int top = cy - height / 2;
+                    found.push_back(cv::Rect(left, top, width, height));
+                }
+                data += 85;
+            }

            target = 0;
            for(unsigned i = 0; i < weights.size(); i++) {
@ -65,9 +85,9 @@ static void processFrameThread(Robot robot) {
                    target = i;
            }

-            if(track) {
-                float yaw = found[target].x;
-                float pitch = found[target].y;
+            if(track && !weights.empty()) {
+                float yaw = target_x = found[target].x;
+                float pitch = target_y = found[target].y;

                // Normalize the coordinates
                yaw = 2 * (yaw - img.cols / 2) / img.cols;
@ -192,6 +212,8 @@ static void captureFrameThread(SDL_Window* window, const char* fname) {


                    SDL_UpdateTexture(texture, NULL, img.data, img.cols * 3);
+                    SDL_RenderDrawLine(renderer, 1280 / 2, 720 / 2, target_x, target_y);
+
                    SDL_RenderClear(renderer);
                    SDL_RenderCopy(renderer, texture, NULL, NULL);
                    SDL_RenderPresent(renderer);
@ -246,6 +268,9 @@ int main(int argc, char* argv[]) {
    std::thread *captureThread = nullptr;
    std::thread *processThread = nullptr;

+    target_x = 720 / 2;
+    target_y = 1280 / 2;
+
    while(robot_work(robot)) {

        int h = 720, w = 1280;
@ -267,9 +292,11 @@ int main(int argc, char* argv[]) {
                        case GIMBAL_ACTION_PUSH_CMD:
                            if(fragment.message.push.gimbalaction.state != 1)
                                break;
+                            yaw_cur = fragment.message.push.gimbalaction.yaw;
+                            pitch_cur = fragment.message.push.gimbalaction.pitch;
                            printf("Gimbal action %d\n", fragment.message.push.gimbalaction.id);
                            printf("\tProgress %d\n", fragment.message.push.gimbalaction.progress);
-                            printf("\tYaw %d, Pitch %d\n", fragment.message.push.gimbalaction.yaw, fragment.message.push.gimbalaction.pitch);
+                            printf("\tYaw %d, Pitch %d\n", yaw_cur, pitch_cur);
                            break;
                        case GIMBAL_ROTATE_CMD:
                            if(fragment.message.resp.gimbalrot.retcode) {
@ -308,7 +335,6 @@ int main(int argc, char* argv[]) {
            }
        }

-        std::unique_lock<std::mutex> lock(mtx);
        SDL_Event event;
        while(SDL_PollEvent(&event)) {
            switch(event.type) {
@ -353,7 +379,6 @@ int main(int argc, char* argv[]) {
                case SDL_MOUSEBUTTONDOWN:
                {
                    // Get window coordinates
-                    int target_x = 0, target_y = 0;
                    SDL_GetMouseState(&target_x, &target_y);

                    float yaw = target_x;
@ -375,7 +400,6 @@ int main(int argc, char* argv[]) {
                default: break;
 	        }
 	    }
-        lock.unlock();
    }

    if(captureThread != nullptr) {
--- a/src/stream.cpp
+++ b/src/stream.cpp
@ -3,7 +3,7 @@
 #include <mutex>
 #include <cmath>
 #include <opencv2/opencv.hpp>
-#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/dnn/dnn.hpp>
 #include <SDL2/SDL.h>

 extern "C" {
@ -22,46 +22,39 @@ unsigned target;
 bool stop = false;

 static void processFrameThread() {
-    cv::HOGDescriptor hog;
-    hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector());
+    cv::dnn::Net net = cv::dnn::readNet("yolov3-tiny.weights", "yolov3-tiny.cfg");

    while(!stop) {
        std::unique_lock<std::mutex> lock(mtx);

        if(!img.empty()) {
-            cv::Mat gray;
-            cv::cvtColor(img, gray, cv::COLOR_BGR2GRAY);
-            hog.detectMultiScale(gray, found, weights, 0, cv::Size(), cv::Size(), 1.1);
-
-            target = 0;
-            for(unsigned i = 0; i < weights.size(); i++) {
-                if(weights[i] > weights[target])
-                    target = i;
+            found.clear();
+            weights.clear();
+            cv::Mat blob = cv::dnn::blobFromImage(img, 1/255.0, cv::Size(416, 416), cv::Scalar(0, 0, 0), true, false);
+            net.setInput(blob);
+            std::vector<cv::Mat> outs;
+            net.forward(outs, net.getUnconnectedOutLayersNames());
+            float *data = (float*)outs[0].data;
+            for(int i = 0; i < outs[0].rows; ++i) {
+                float final_score = data[4] * data[5];
+                if(final_score >= 0.0075)
+                {
+                    weights.push_back(final_score);
+                    int cx = data[0] * img.cols;
+                    int cy = data[1] * img.rows;
+                    int width = data[2] * img.cols;
+                    int height = data[3] * img.rows;
+                    int left = cx - width / 2;
+                    int top = cy - height / 2;
+                    found.push_back(cv::Rect(left, top, width, height));
+                }
+                data += 85;
            }
-
-            // Get the center of the highest weighted rectangle
-            float x = found[target].x + found[target].width / 2;
-            float y = found[target].y + found[target].height / 2;
-
-            // Normalize the coordinates
-            x = 2 * (x - img.cols / 2) / img.cols;
-            y = 2 * (y - img.rows / 2) / img.rows;
-
-            // Get the FOV angle of the point in radians
-            float FOV = 120 * (M_PI / 180);
-            x = x * (FOV / 2);
-            y = y * (FOV / 2);
-
-            // Convert to degrees
-            x = x * (180 / M_PI);
-            y = y * (180 / M_PI);
-
-            printf("y: %f, p: %f\n", x, y);
        }

        lock.unlock();

-        SDL_Delay(75);
+        SDL_Delay(250);
    }

 }