Pose Detection not working on WebGL build

Open JaneshOhilo opened this issue 10 months ago • 0 comments

I tried running the official BlazeDetectionSample Pose. It worked fine in the editor and WebGL build for an image. I modified it to work with WebCam Feed which worked in the Editor but not in the WebGL build. To be precise the webcam is starting and I can see myself, but the keypoints are not visualized in the WebGL build.

using System;
using Unity.Mathematics;
using Unity.Sentis;
using UnityEngine;

public class PoseDetection : MonoBehaviour
{
  public PosePreview posePreview;
  public ImagePreview imagePreview;
  public Texture2D imageTexture;
  public ModelAsset poseDetector;
  public ModelAsset poseLandmarker;
  public TextAsset anchorsCSV;

  public float scoreThreshold = 0.75f;

  const int k_NumAnchors = 2254;
  float[,] m_Anchors;

  const int k_NumKeypoints = 33;
  const int detectorInputSize = 224;
  const int landmarkerInputSize = 256;

  Worker m_PoseDetectorWorker;
  Worker m_PoseLandmarkerWorker;
  Tensor<float> m_DetectorInput;
  Tensor<float> m_LandmarkerInput;
  Awaitable m_DetectAwaitable;

  float m_TextureWidth;
  float m_TextureHeight;

  //! New
  WebCamTexture webcamTexture;
  // RenderTexture outputTexture;



  public async void Start()
  {
    Debug.Log($"Supports compute shaders : {SystemInfo.supportsComputeShaders}");
    Application.targetFrameRate = 60;
    try
    {

      m_Anchors = BlazeUtils.LoadAnchors(anchorsCSV.text, k_NumAnchors);

      var poseDetectorModel = ModelLoader.Load(poseDetector);
      // post process the model to filter scores + argmax select the best pose
      var graph = new FunctionalGraph();
      var input = graph.AddInput(poseDetectorModel, 0);
      var outputs = Functional.Forward(poseDetectorModel, input);
      var boxes = outputs[0]; // (1, 2254, 12)
      var scores = outputs[1]; // (1, 2254, 1)
      var idx_scores_boxes = BlazeUtils.ArgMaxFiltering(boxes, scores);
      poseDetectorModel = graph.Compile(idx_scores_boxes.Item1, idx_scores_boxes.Item2, idx_scores_boxes.Item3);

      m_PoseDetectorWorker = new Worker(poseDetectorModel, backendType: BackendType.GPUCompute);
    }
    catch (Exception e)
    {
      Debug.LogError(e);
    }

    WebCamDevice[] devices = WebCamTexture.devices;
    webcamTexture = new WebCamTexture(1920, 1080);
    webcamTexture.deviceName = devices[0].name;
    webcamTexture.Play();

    // outputTexture = new RenderTexture(landmarkerInputSize, landmarkerInputSize, 0, RenderTextureFormat.ARGBFloat);


    var poseLandmarkerModel = ModelLoader.Load(poseLandmarker);
    m_PoseLandmarkerWorker = new Worker(poseLandmarkerModel, BackendType.GPUCompute);

    m_DetectorInput = new Tensor<float>(new TensorShape(1, detectorInputSize, detectorInputSize, 3));
    m_LandmarkerInput = new Tensor<float>(new TensorShape(1, landmarkerInputSize, landmarkerInputSize, 3));

    while (true)
    {
      try
      {

        m_DetectAwaitable = Detect(webcamTexture);
        await m_DetectAwaitable;
      }
      catch (OperationCanceledException)
      {
        break;
      }
    }

    m_PoseDetectorWorker.Dispose();
    m_PoseLandmarkerWorker.Dispose();
    m_DetectorInput.Dispose();
    m_LandmarkerInput.Dispose();
  }
  Vector3 ImageToWorld(Vector2 position)
  {
    return (position - 0.5f * new Vector2(m_TextureWidth, m_TextureHeight)) / m_TextureHeight;
  }

  async Awaitable Detect(Texture texture)
  {
    m_TextureWidth = texture.width;
    m_TextureHeight = texture.height;
    imagePreview.SetTexture(texture);

    var size = Mathf.Max(texture.width, texture.height);

    // The affine transformation matrix to go from tensor coordinates to image coordinates
    var scale = size / (float)detectorInputSize;
    var M = BlazeUtils.mul(BlazeUtils.TranslationMatrix(0.5f * (new Vector2(texture.width, texture.height) + new Vector2(-size, size))), BlazeUtils.ScaleMatrix(new Vector2(scale, -scale)));
    BlazeUtils.SampleImageAffine(texture, m_DetectorInput, M);

    m_PoseDetectorWorker.Schedule(m_DetectorInput);

    var outputIdxAwaitable = (m_PoseDetectorWorker.PeekOutput(0) as Tensor<int>).ReadbackAndCloneAsync();
    var outputScoreAwaitable = (m_PoseDetectorWorker.PeekOutput(1) as Tensor<float>).ReadbackAndCloneAsync();
    var outputBoxAwaitable = (m_PoseDetectorWorker.PeekOutput(2) as Tensor<float>).ReadbackAndCloneAsync();

    using var outputIdx = await outputIdxAwaitable;
    using var outputScore = await outputScoreAwaitable;
    using var outputBox = await outputBoxAwaitable;

    var scorePassesThreshold = outputScore[0] >= scoreThreshold;
    posePreview.SetActive(scorePassesThreshold);

    if (!scorePassesThreshold)
      return;

    var idx = outputIdx[0];

    var anchorPosition = detectorInputSize * new float2(m_Anchors[idx, 0], m_Anchors[idx, 1]);

    var face_ImageSpace = BlazeUtils.mul(M, anchorPosition + new float2(outputBox[0, 0, 0], outputBox[0, 0, 1]));
    var faceTopRight_ImageSpace = BlazeUtils.mul(M, anchorPosition + new float2(outputBox[0, 0, 0] + 0.5f * outputBox[0, 0, 2], outputBox[0, 0, 1] + 0.5f * outputBox[0, 0, 3]));

    var kp1_ImageSpace = BlazeUtils.mul(M, anchorPosition + new float2(outputBox[0, 0, 4 + 2 * 0 + 0], outputBox[0, 0, 4 + 2 * 0 + 1]));
    var kp2_ImageSpace = BlazeUtils.mul(M, anchorPosition + new float2(outputBox[0, 0, 4 + 2 * 1 + 0], outputBox[0, 0, 4 + 2 * 1 + 1]));
    var delta_ImageSpace = kp2_ImageSpace - kp1_ImageSpace;
    var dscale = 1.25f;
    var radius = dscale * math.length(delta_ImageSpace);
    var theta = math.atan2(delta_ImageSpace.y, delta_ImageSpace.x);
    var origin2 = new float2(0.5f * landmarkerInputSize, 0.5f * landmarkerInputSize);
    var scale2 = radius / (0.5f * landmarkerInputSize);
    var M2 = BlazeUtils.mul(BlazeUtils.mul(BlazeUtils.mul(BlazeUtils.TranslationMatrix(kp1_ImageSpace), BlazeUtils.ScaleMatrix(new float2(scale2, -scale2))), BlazeUtils.RotationMatrix(0.5f * Mathf.PI - theta)), BlazeUtils.TranslationMatrix(-origin2));
    BlazeUtils.SampleImageAffine(texture, m_LandmarkerInput, M2);

    var boxSize = 2f * (faceTopRight_ImageSpace - face_ImageSpace);

    // posePreview.SetBoundingBox(true, ImageToWorld(face_ImageSpace), boxSize / m_TextureHeight);
    // posePreview.SetBoundingCircle(true, ImageToWorld(kp1_ImageSpace), radius / m_TextureHeight);

    m_PoseLandmarkerWorker.Schedule(m_LandmarkerInput);

    var landmarksAwaitable = (m_PoseLandmarkerWorker.PeekOutput("Identity") as Tensor<float>).ReadbackAndCloneAsync();
    using var landmarks = await landmarksAwaitable; // (1,195)

    for (var i = 0; i < k_NumKeypoints; i++)
    {
      // https://arxiv.org/pdf/2006.10204
      var position_ImageSpace = BlazeUtils.mul(M2, new float2(landmarks[5 * i + 0], landmarks[5 * i + 1]));
      var visibility = landmarks[5 * i + 3];
      var presence = landmarks[5 * i + 4];

      // z-position is in unit cube centered on hips
      Vector3 position_WorldSpace = ImageToWorld(position_ImageSpace) + new Vector3(0, 0, landmarks[5 * i + 2] / m_TextureHeight);
      posePreview.SetKeypoint(i, visibility > 0.5f && presence > 0.5f, position_WorldSpace);
    }
  }

  void OnDestroy()
  {
    m_DetectAwaitable.Cancel();
  }
}

In the Chrome console, there is no error, but a lot of warnings.

249[Invalid ComputePipeline (unlabeled)] is invalid.

While encoding [ComputePassEncoder (unlabeled)].SetPipeline([Invalid ComputePipeline (unlabeled)]).

While finishing [CommandEncoder (unlabeled)].

Also this

249[Invalid CommandBuffer] is invalid.

While calling [Queue].Submit([[Invalid CommandBuffer]])

I use the Graphics API as WebGPU and Backend Type as GPUCompute.

Also worth mentioning is that the DepthEstimation Sample, which also uses the WebCam Texture for real-time inference, works fine both in the editor and WebGL build. I compared the GraphicsAPI setting and the BackendType and this sample uses WebGL2 and GPUPixel.

After mirroring the configuration I get the video feed in the build but do not get any landmark detection as before. The error changed though

Kernel 'ImageSample' not found

SentisPoseBuildWebcamWebGLGPUPixel.framework.js.br:9 System.TypeInitializationException: The type initializer for 'BlazeUtils' threw an exception. ---> System.ArgumentException: Kernel 'ImageSample' not found.

--- End of inner exception stack trace ---

SentisPoseBuildWebcamWebGLGPUPixel.framework.js.br:9 IndexOutOfRangeException: Invalid kernelIndex (0) passed, must be non-negative less than 0.

I am not sure how to proceed further. Any help would be appreciated. Thanks.

Feb 26 '25 07:02 JaneshOhilo