spnv2 Test issue

hello, thansk for your working, I am trying to test synthetic dataset based on the provided weight "spnv2_efficientnetb3_fullconfig_offline.pth.tar", and using offline_train_full_config_phi3_BN.yaml file. In this .yaml file, i modify the image_size to size[768,512], and adjust the camera intrinsic parameters to match the size of [768, 512]. Then I execute output=model(image), get and save R_pre, t_pre from output[1]. Is there a problem with my approach? By computing metrics between PrePose and GroundTruth Pose, I find there is a large error in PrePose .

Apr 08 '25 09:04 pubyLu

Hello,

I believe the second element of output = model(image) is the regressed pose according to the setting in the YAML file. So, the camera intrinsics file shouldn't play a role for this output.

How big and how consistent are the errors? Do you encounter the same errors if you keep the original YAML file without modifications?

Apr 16 '25 05:04 tpark94

Good afternoon, I tred to keep the original YAML file without modifications, but also had big errors, It is equivalent to no reasoning ability.

I attach the code demo.py that I use for this work. Could you please help me to see what is wrong?

Thanks for your help and reply! Have a good day!

from future import absolute_import from future import division from future import print_function

import os import os.path as osp import argparse

from scipy.io import loadmat

import time from scipy.io import savemat

from core.utils.utils import AverageMeter, ProgressMeter from core.utils.postprocess import solve_pose_from_heatmaps, rot_6d_to_matrix from core.utils.metrics import * from core.utils.visualize import * from core.config import cfg, update_config from core.nets import build_spnv2 from core.dataset.MyDataset import SPEEDPLUSDataset from core.engine.inference import do_valid from core.utils.utils import set_seeds_cudnn, create_logger_directories,
load_camera_intrinsics, load_tango_3d_keypoints import numpy as np from transforms3d.quaternions import quat2mat

def do_valid(model, data_loader, camera, keypts_true_3D, valid_fraction=None, log_dir=None, device=torch.device('cpu')):

# switch to eval mode
model.eval()
savePath=r"D:\A_projectWok\newTest\spnv2_result\speedplus\pose_pre"
# Loop through dataloader
# end = time.time()
for idx, (images, names) in enumerate(data_loader):
    print(f"process image-{names[0]}....")
    # start = time.time()
    # data_time.update((start - end)*1000)

    assert images.shape[0] == 1, 'Use batch size = 1 for testing'

    images=images.permute(0,3,1,2)
    with torch.no_grad():
        # Forward pass
        outputs = model(images,
                        is_train=False,
                        gpu=device)
        # method 1
        keypts_pr, q_pr, t_pr, reject = solve_pose_from_heatmaps(
            outputs[0].squeeze(0).cpu(),
            cfg.DATASET.IMAGE_SIZE,
            cfg.TEST.HEATMAP_THRESHOLD,
            camera, keypts_true_3D
        )
        preR = quat2mat(q_pr)
        preT = t_pr
        preRT = np.eye(4)
        preRT[:3, :3] = preR
        preRT[:3, 3] = preT
        if not reject:
            np.savetxt(os.path.join(savePath, f"{names[0]}_hp.txt"), preRT)
            pass
        else:
            q_pr = t_pr = None
            print(f"can't compute pose for image-{names[0]}.....")


        # method 2
        classification, bbox_prediction, \
            rotation_raw, translation = outputs[1]
        _, cls_argmax = torch.max(classification, dim=1)

        # Bbox
        bbox_pr = bbox_prediction[0,cls_argmax].squeeze().cpu().numpy()
        R_pr    = rot_6d_to_matrix(rotation_raw[0,cls_argmax,:]).squeeze().cpu().numpy()
        t_pr    = translation[0,cls_argmax].squeeze().cpu().numpy()
        RT = np.eye(4)
        RT[:3, :3] = R_pr
        RT[:3, 3] = t_pr
        np.savetxt(os.path.join(savePath, f"{names[0]}.txt"), RT)
return

for i in range(): continue def parse_args(): parser = argparse.ArgumentParser(description='Test on SPNv2')

# general
parser.add_argument('--cfg',
                    help='experiment configure file name',
                    default="D:\A_projectWok\spnv2-new\experiments\offline_train_full_config_phi3_BN.yaml",
                    type=str)

parser.add_argument('opts',
                    help="Modify config options using the command-line",
                    default=None,
                    nargs=argparse.REMAINDER)

args = parser.parse_args()

return args

def main(cfg): args = parse_args() update_config(cfg, args)

# Load model to test
test_model = osp.join(cfg.OUTPUT_DIR, cfg.TEST.MODEL_FILE)
if not osp.exists(test_model) or osp.isdir(test_model):
    # test_model = osp.join(cfg.OUTPUT_DIR, cfg.MODEL.BACKBONE.NAME,
    #                             cfg.EXP_NAME, 'model_best.pth.tar')
    test_model = ""
cfg.defrost()
cfg.TEST.MODEL_FILE = test_model
cfg.freeze()

# Logger & directories
logger, output_dir, _ = create_logger_directories(cfg, 'test')

# Set all seeds & cudNN
set_seeds_cudnn(cfg, seed=cfg.SEED)

# GPU?
device = torch.device('cuda:0') if cfg.CUDA and torch.cuda.is_available() else torch.device('cpu')

# Complete network
model = build_spnv2(cfg)

# Load checkpoint
if cfg.TEST.MODEL_FILE:
    model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE, map_location='cpu'), strict=True)
    logger.info('   - Model loaded from {}'.format(cfg.TEST.MODEL_FILE))
model = model.to(device)

# Dataloaders
dataset = SPEEDPLUSDataset(root=r"D:\A_projectWok\newTest\spnv2_result\speedplus\speed-2\color")
test_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=1,
    shuffle=False,
    num_workers=1,
    pin_memory=True,
    drop_last=True,
)
# For validation
camera_json_path = r"D:\A_projectWok\newTest\spnv2_result\speedplus\camera.txt"
camera = load_camera_intrinsics(camera_json_path)
# keypts_true_3D = load_tango_3d_keypoints(cfg.DATASET.KEYPOINTS)
mat_dir = r"D:\A_projectWok\newTest\spnv2_result\speedplus\models\tangoPoints.mat"
vertices = loadmat(mat_dir)['tango3Dpoints']  # [3 x 11]
keypts_true_3D = np.transpose(np.array(vertices, dtype=np.float32))  # [11 x 3]

# ---------------------------------------
# Main Test
# ---------------------------------------
do_valid(model,
         test_loader,
         camera,
         keypts_true_3D,
         valid_fraction=None,
         log_dir=output_dir,
         device=device)

if name=='main': main(cfg)

Apr 16 '25 09:04 pubyLu

I see that you seemed to have implemented your own dataset class as shown by from core.dataset.MyDataset import SPEEDPLUSDataset. Please do note that if you follow a different set of image pre-processing steps compared to training, the results will be worse. I suggest that you first try using the original SPEEDPLUSDataset class and see if the problem persists.

Apr 16 '25 23:04 tpark94

thanks for your reply, I also use Speed++ dataset to implement your work. But I just rewrite the SPEEDPLUSDataset in MyDataset.py, because i just need load the images, and use output = model(image) to infer the poses of input. Original SPEEDPLUSDataset's function "getitem()" returns [image, targets], but targets are not used in inference. The code you provided in inference.py--do_valid() function shows that targets are only used for calculating metrics. class SPEEDPLUSDataset(data.Dataset): def init(self, root): super(SPEEDPLUSDataset, self).init() self.root = root self.image_lists = self.processRoot(root)

def processRoot(self, root):
    images=[]
    for file in os.listdir(root):
        if file.endswith('.jpg') or file.endswith('.png'):
            images.append(os.path.join(root, file))
    return images

def __getitem__(self, index):
    image_path = self.image_lists[index]
    image = cv2.imread(image_path) # load 1920*1080 images
    # image = cv2.resize(image, (768, 512)) # load 1920*1080 images, then resize them to (768,512)
    name = os.path.basename(image_path).split('.')[0]
    image = torch.from_numpy(image).float()
    return image, name

def __len__(self):
    return len(self.image_lists)

Apr 17 '25 05:04 pubyLu

Besides, I couldn't find the model.ply file online. If it's convenient for you, could you please send me a copy

Apr 17 '25 07:04 pubyLu

Could you please check if you are using the data transformations as defined in core/dataset/transforms/build.py?

In line 59, there is a line that goes

transforms += [A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2()]

which effectively processes the image pixel values using the ImageNet's statistics. The trained weights expect this transformation to be part of the pre-processing step.

Apr 21 '25 00:04 tpark94

I couldn't find the model.ply file online,too.

May 27 '25 08:05 Yancccccc

Test issue

Thanks for your help and reply! Have a good day!​​​

Thanks for your help and reply! Have a good day!