InstantMesh Testing the normal map rendering

Hi! Thanks for the great work. I have ran your code and tried to test the normal rendering using render_mesh functions. Since they are used for training I was wondering how they work and their output, however I am not sure if I render correctly the normal map of a simple cube.

Here is the normal map of the cube I generated using your code: normal

Here is the normal map of the cube on Blender: cube_normal

As you can see, it seems that the different faces of the cube are merged together in the normal map I have generated using your code. What do you think of this issue? Is it the behavior intended or I missed something in the code?

Apr 30 '24 09:04 Kev1MSL

Hi, could you provide your code for rendering the cube normal as well as the mesh so that I can make a test?

May 01 '24 05:05 bluestyle97

Yes sure, I am exploring the training part right now! I am using trimesh for obtaining the faces and vertices as a numpy list. Here is my code for rendering the cube, feel free to let me know if I misunderstood something is the usage of your model:

import numpy as np
import logging
import trimesh
from PIL import Image
from ..models.lrm_mesh import InstantMesh
import importlib
from omegaconf import OmegaConf
import torch
import os


def render_cube():
    # Loading the model
    config_path = "configs/instant-mesh-large.yaml"
    config = OmegaConf.load(config_path)
    model_config = config.model_config
    device = torch.device("cuda")
    model = InstantMesh(**model_config.get("params", dict()))
    model = model.to(device)
    model.init_flexicubes_geometry(device)
    model.eval()

    # Loading the cube
    curr_dir = os.path.dirname(os.path.abspath(__file__))
    obj_path = os.path.join(curr_dir, "cube.obj")
    cube = trimesh.load_mesh(obj_path)

    # Camera pose (hardcoded from blender)
    pose = np.array(
        [
            [1.00000000e00, 0.00000000e00, 0.00000000e00, 0.00000000e00],
            [0.00000000e00, 3.42285418e-08, -9.99999940e-01, -9.36579132e00],
            [0.00000000e00, 9.99999940e-01, 3.42285418e-08, 0.00000000e00],
            [0.00000000e00, 0.00000000e00, 0.00000000e00, 1.00000000e00],
        ]
    )

    # Getting the faces and vertices from trimesh
    faces = cube.faces
    vertices = cube.vertices

    logging.debug(f"Faces shape: {faces.shape}")
    logging.debug(f"Vertices shape: {vertices.shape}")

    # Format the vertices and faces for usage in the model render_mesh function
    vertices = np.array([vertices])
    faces = np.array([faces])
    pose = torch.from_numpy(pose).float()
    pose = torch.linalg.inv(pose).float()
    cameras = np.array(
        [[pose]]
    )  # We have only one camera because one mesh for the cube object

    vertices = torch.from_numpy(vertices).to(device).type(torch.float32)
    faces = torch.from_numpy(faces).to(device)

    logging.debug(f"Vertices shape: {vertices.shape}")
    logging.debug(f"Faces shape: {faces.shape}")
    logging.debug(f"Cameras shape: {cameras.shape}")

    # Rendering the mesh using the model render_mesh function
    mask, hard_mask, text_pos, depth, normal = model.render_mesh(
        vertices, faces, cameras, render_size=512
    )
    logging.debug(f"Normal shape: {normal.shape}")
    logging.debug(f"Depth shape: {depth.shape}")

    # Saving the depth and normal maps
    os.makedirs("output", exist_ok=True)
    output_dir = "output"
    depth = depth[0].cpu()
    normal = normal[0].cpu()
    mask = mask[0].cpu()
    hard_mask = hard_mask[0].cpu()

    depth_path = os.path.join(output_dir, "depth.png")
    normal_path = os.path.join(output_dir, "normal_map.png")
    mask_path = os.path.join(output_dir, "mask.png")
    hard_mask_path = os.path.join(output_dir, "hard_mask.png")

    depth_np = depth.squeeze().numpy()
    normal_np = normal.squeeze().numpy()
    hard_mask_np = hard_mask.squeeze().numpy()
    mask_np = mask.squeeze().numpy()

    depth_np = (depth_np * 255).astype(np.uint8)
    normal_np = (normal_np * 255).astype(np.uint8)
    mask_np = (mask_np * 255).astype(np.uint8)
    hard_mask_np = (hard_mask_np * 255).astype(np.uint8)

    Image.fromarray(depth_np).save(depth_path)
    Image.fromarray(normal_np).save(normal_path)
    Image.fromarray(mask_np).save(mask_path)
    Image.fromarray(hard_mask_np).save(hard_mask_path)

I am also giving you the OBJ file in case it is useful:

# Blender 4.1.1
# www.blender.org
o Cube
v 0.024682 -1.706954 0.292741
v 1.413998 -0.689654 -0.724559
v -1.413998 -0.724559 -0.689654
v -0.024682 0.292741 -1.706954
v 0.024682 -0.292741 1.706954
v 1.413998 0.724559 0.689654
v -1.413998 0.689654 0.724559
v -0.024682 1.706954 -0.292741
s 0
f 1 5 7 3
f 4 3 7 8
f 8 7 5 6
f 6 2 4 8
f 2 1 3 4
f 6 5 1 2

May 01 '24 10:05 Kev1MSL

I have done some research and maybe the cause is the interpolation of the normal map when you are splatting face normals to vertices with this code in the neural_renderer.py

def compute_vertex_normal(v_pos, t_pos_idx):
    i0 = t_pos_idx[:, 0]
    i1 = t_pos_idx[:, 1]
    i2 = t_pos_idx[:, 2]

    v0 = v_pos[i0, :]
    v1 = v_pos[i1, :]
    v2 = v_pos[i2, :]

    face_normals = torch.cross(v1 - v0, v2 - v0)

    # Splat face normals to vertices
    v_nrm = torch.zeros_like(v_pos)
    v_nrm.scatter_add_(0, i0[:, None].repeat(1, 3), face_normals)
    v_nrm.scatter_add_(0, i1[:, None].repeat(1, 3), face_normals)
    v_nrm.scatter_add_(0, i2[:, None].repeat(1, 3), face_normals)

    # Normalize, replace zero (degenerated) normals with some default value
    v_nrm = torch.where(
        dot(v_nrm, v_nrm) > 1e-20, v_nrm, torch.as_tensor([0.0, 0.0, 1.0]).to(v_nrm)
    )
    v_nrm = F.normalize(v_nrm, dim=1)
    assert torch.all(torch.isfinite(v_nrm))

    return v_nrm

I am wondering if it is expected to have this kind of smoothing or not. By the way I have tested on a plane and the rendering works fine with a single color

May 01 '24 11:05 Kev1MSL

@Kev1MSL can you provide the code or the way you used to create the normal map of the cube on Blender?

May 12 '24 21:05 djamal-m