OpenCVForUnity icon indicating copy to clipboard operation
OpenCVForUnity copied to clipboard

How to convert c++ sample ‘Hand Keypoint Detection’ using opencvforunity?

Open peterchen2001 opened this issue 6 years ago • 3 comments

I am studying the sample ‘Hand Keypoint Detection’ on https://www.learnopencv.com/hand-keypoint-detection-using-deep-learning-and-opencv/, it works in c++ application. I am trying to convert this using opencvforunity. Everything works well except below c++ line conversion:

//Mat probMap(H, W, CV_32F, output.ptr(0, n));

What's the equivalent code in opencvforunity?

See my below code snippet: Mat input = Dnn.blobFromImage(img, 1.0 / 255, new Size(inWidth, inHeight), new Scalar(0, 0, 0), false, false);

            net.setInput(input);

            //                TickMeter tm = new TickMeter ();
            //                tm.start ();

            Mat output = net.forward();

            int H = output.size(2);
            int W = output.size(3);
            float[] data = new float[W];
            List<Point> points = new List<Point>();
            for (int n = 0; n < nPoints; n++)
            {
                // Probability map of corresponding body's part. 
                //Mat probMap(H, W, CV_32F, output.ptr(0, n));
                output.get(n, 0, data);
                Mat probMap = new Mat(H, W, CvType.CV_32FC1);
                probMap.put(0, 0, data);

                Imgproc.resize(probMap, probMap, new Size(frameWidth, frameHeight));

                Point maxLoc;
                double prob;

                Core.MinMaxLocResult result = Core.minMaxLoc(probMap);
                //minMaxLoc(probMap, 0, &prob, 0, &maxLoc);
                probMap.Dispose();


                double x = result.maxLoc.x;
                double y = result.maxLoc.y;
                Debug.Log(string.Format("x:{0},y:{1},maxVal:{2}", x, y, result.maxVal));
                maxLoc = new Point(x, y);
                if (result.maxVal > 0.1)
                {
                    points.Add(maxLoc);
                    Imgproc.circle(img, new Point((int)x, (int)y), 8, new Scalar(0, 255, 255), -1);
                    Imgproc.putText(img, string.Format("{0}", n), new Point((int)x, (int)y), Imgproc.FONT_HERSHEY_COMPLEX, 1, new Scalar(0, 0, 255), 2);
                    //circle(frameCopy, new Point(x, y), 8, Scalar(0, 255, 255), -1);
                    //cv::putText(frameCopy, cv::format("%d", n), cv::Point((int)maxLoc.x, (int)maxLoc.y), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 0, 255), 2);
                }
                else
                {
                    points.Add(null);
                }
            }

peterchen2001 avatar Nov 28 '19 06:11 peterchen2001

I added code to load a hand detection model in OpenPoseExample. OpenPoseExample.zip

EnoxSoftware avatar Nov 28 '19 11:11 EnoxSoftware

Great job! That works when detecting image from disk. However, my scenario is for each frame from webcam, I got error saying "dnn::forward_11() : OpenCV(4.1.0-dev) C:\Users\satoo\Desktop\opencv\modules\dnn\src\layers\convolution_layer.cpp:282: error: (-2:Unspecified error) Number of input channels should be multiple of 3 but got 4 in function 'cv::dnn::ConvolutionLayerImpl::getMemoryShapes'". Any idea to fix this? Below code snippet is what I was testing:

void Update () { if (webCamTextureToMatHelper.IsPlaying () && webCamTextureToMatHelper.DidUpdateThisFrame ()) {

            Mat rgbaMat = webCamTextureToMatHelper.GetMat ();
            if (detectType == DATASET_TYPE.ObjectDetect)
            {
                RunDnnObjectDetection(rgbaMat);
            }
            else if(detectType == DATASET_TYPE.HAND)
            {
                RunHandPose(rgbaMat);
            }
        }
    }
    void RunHandPose(Mat img)
    {

        //if true, The error log of the Native side OpenCV will be displayed on the Unity Editor Console.
        Utils.setDebugMode(true);

        //Mat img = Imgcodecs.imread(image_filepath);
        if (img.empty())
        {
            Debug.LogError("dnn/COCO_val2014_000000000589.jpg is not loaded.The image file can be downloaded here: \"https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/examples/media/COCO_val2014_000000000589.jpg\" folder. ");
            img = new Mat(368, 368, CvType.CV_8UC3, new Scalar(0, 0, 0));

        }


        //Adust Quad.transform.localScale.
        gameObject.transform.localScale = new Vector3(img.width(), img.height(), 1);
        Debug.Log("Screen.width " + Screen.width + " Screen.height " + Screen.height + " Screen.orientation " + Screen.orientation);

        float imageWidth = img.width();
        float imageHeight = img.height();

        float widthScale = (float)Screen.width / imageWidth;
        float heightScale = (float)Screen.height / imageHeight;
        if (widthScale < heightScale)
        {
            Camera.main.orthographicSize = (imageWidth * (float)Screen.height / (float)Screen.width) / 2;
        }
        else
        {
            Camera.main.orthographicSize = imageHeight / 2;
        }

        if (net == null)
        {

            Imgproc.putText(img, "model file is not loaded.", new Point(5, img.rows() - 30), Imgproc.FONT_HERSHEY_SIMPLEX, 0.7, new Scalar(255, 255, 255), 2, Imgproc.LINE_AA, false);
            Imgproc.putText(img, "Please read console message.", new Point(5, img.rows() - 10), Imgproc.FONT_HERSHEY_SIMPLEX, 0.7, new Scalar(255, 255, 255), 2, Imgproc.LINE_AA, false);

        }
        else
        {
            float frameWidth = img.cols();
            float frameHeight = img.rows();

            Mat input = Dnn.blobFromImage(img, inScale, new Size(inWidth, inHeight), new Scalar(0, 0, 0), false, false);
            Debug.Log(string.Format("channels:{0}", input.channels()));
            net.setInput(input);

            //                TickMeter tm = new TickMeter ();
            //                tm.start ();

            Mat output = net.forward();

            //                tm.stop ();
            //                Debug.Log ("Inference time, ms: " + tm.getTimeMilli ());

            Debug.Log("output.size(0) " + output.size(0));
            Debug.Log("output.size(1) " + output.size(1));
            Debug.Log("output.size(2) " + output.size(2));
            Debug.Log("output.size(3) " + output.size(3));

            float[] data = new float[output.size(2) * output.size(3)];

            output = output.reshape(1, output.size(1));

            List<Point> points = new List<Point>();
            for (int i = 0; i < BODY_PARTS.Count; i++)
            {

                output.get(i, 0, data);

                Mat heatMap = new Mat(1, data.Length, CvType.CV_32FC1);
                heatMap.put(0, 0, data);


                //Originally, we try to find all the local maximums. To simplify a sample
                //we just find a global one. However only a single pose at the same time
                //could be detected this way.
                Core.MinMaxLocResult result = Core.minMaxLoc(heatMap);

                heatMap.Dispose();


                double x = (frameWidth * (result.maxLoc.x % 46)) / 46;
                double y = (frameHeight * (result.maxLoc.x / 46)) / 46;

                if (result.maxVal > 0.1)
                {
                    points.Add(new Point(x, y));
                }
                else
                {
                    points.Add(null);
                }

            }

            for (int i = 0; i < POSE_PAIRS.GetLength(0); i++)
            {
                string partFrom = POSE_PAIRS[i, 0];
                string partTo = POSE_PAIRS[i, 1];

                int idFrom = BODY_PARTS[partFrom];
                int idTo = BODY_PARTS[partTo];

                if (points[idFrom] != null && points[idTo] != null)
                {
                    Imgproc.line(img, points[idFrom], points[idTo], new Scalar(0, 255, 0), 3);
                    Imgproc.ellipse(img, points[idFrom], new Size(3, 3), 0, 0, 360, new Scalar(0, 0, 255), Core.FILLED);
                    Imgproc.ellipse(img, points[idTo], new Size(3, 3), 0, 0, 360, new Scalar(0, 0, 255), Core.FILLED);
                }
            }



            MatOfDouble timings = new MatOfDouble();
            long t = net.getPerfProfile(timings);
            Debug.Log("t: " + t);
            Debug.Log("timings.dump(): " + timings.dump());

            double freq = Core.getTickFrequency() / 1000;
            Debug.Log("freq: " + freq);

            Imgproc.putText(img, (t / freq) + "ms", new Point(10, img.height() - 10), Imgproc.FONT_HERSHEY_SIMPLEX, 0.6, new Scalar(0, 0, 255), 2);
        }

        Imgproc.cvtColor(img, img, Imgproc.COLOR_BGR2RGB);


        Texture2D texture = new Texture2D(img.cols(), img.rows(), TextureFormat.RGBA32, false);

        Utils.matToTexture2D(img, texture);

        gameObject.GetComponent<Renderer>().material.mainTexture = texture;
        Utils.setDebugMode(false);
    }

peterchen2001 avatar Nov 29 '19 02:11 peterchen2001

I guess I fix the error by below line:

        var img = new Mat(webCamTextureMat.rows(), webCamTextureMat.cols(), CvType.CV_8UC3);
        Imgproc.cvtColor(webCamTextureMat, img, Imgproc.COLOR_RGBA2BGR);

However, the performance is very very slow, any idea to make the hand pose detection work for webcam scenario?

peterchen2001 avatar Nov 29 '19 06:11 peterchen2001