Test issue
hello, thansk for your working, I am trying to test synthetic dataset based on the provided weight "spnv2_efficientnetb3_fullconfig_offline.pth.tar", and using offline_train_full_config_phi3_BN.yaml file. In this .yaml file, i modify the image_size to size[768,512], and adjust the camera intrinsic parameters to match the size of [768, 512]. Then I execute output=model(image), get and save R_pre, t_pre from output[1]. Is there a problem with my approach? By computing metrics between PrePose and GroundTruth Pose, I find there is a large error in PrePose .
Hello,
I believe the second element of output = model(image) is the regressed pose according to the setting in the YAML file. So, the camera intrinsics file shouldn't play a role for this output.
How big and how consistent are the errors? Do you encounter the same errors if you keep the original YAML file without modifications?
Good afternoon, I tred to keep the original YAML file without modifications, but also had big errors, It is equivalent to no reasoning ability.
I attach the code demo.py that I use for this work. Could you please help me to see what is wrong?
Thanks for your help and reply! Have a good day!
''' Copyright (c) 2022 SLAB Group Licensed under MIT License (see LICENSE.md) Author: Tae Ha Park ([email protected]) '''
from future import absolute_import from future import division from future import print_function
import os import os.path as osp import argparse
from scipy.io import loadmat
import time from scipy.io import savemat
from core.utils.utils import AverageMeter, ProgressMeter
from core.utils.postprocess import solve_pose_from_heatmaps, rot_6d_to_matrix
from core.utils.metrics import *
from core.utils.visualize import *
from core.config import cfg, update_config
from core.nets import build_spnv2
from core.dataset.MyDataset import SPEEDPLUSDataset
from core.engine.inference import do_valid
from core.utils.utils import set_seeds_cudnn, create_logger_directories,
load_camera_intrinsics, load_tango_3d_keypoints
import numpy as np
from transforms3d.quaternions import quat2mat
def do_valid(model, data_loader, camera, keypts_true_3D, valid_fraction=None, log_dir=None, device=torch.device('cpu')):
# switch to eval mode
model.eval()
savePath=r"D:\A_projectWok\newTest\spnv2_result\speedplus\pose_pre"
# Loop through dataloader
# end = time.time()
for idx, (images, names) in enumerate(data_loader):
print(f"process image-{names[0]}....")
# start = time.time()
# data_time.update((start - end)*1000)
assert images.shape[0] == 1, 'Use batch size = 1 for testing'
images=images.permute(0,3,1,2)
with torch.no_grad():
# Forward pass
outputs = model(images,
is_train=False,
gpu=device)
# method 1
keypts_pr, q_pr, t_pr, reject = solve_pose_from_heatmaps(
outputs[0].squeeze(0).cpu(),
cfg.DATASET.IMAGE_SIZE,
cfg.TEST.HEATMAP_THRESHOLD,
camera, keypts_true_3D
)
preR = quat2mat(q_pr)
preT = t_pr
preRT = np.eye(4)
preRT[:3, :3] = preR
preRT[:3, 3] = preT
if not reject:
np.savetxt(os.path.join(savePath, f"{names[0]}_hp.txt"), preRT)
pass
else:
q_pr = t_pr = None
print(f"can't compute pose for image-{names[0]}.....")
# method 2
classification, bbox_prediction, \
rotation_raw, translation = outputs[1]
_, cls_argmax = torch.max(classification, dim=1)
# Bbox
bbox_pr = bbox_prediction[0,cls_argmax].squeeze().cpu().numpy()
R_pr = rot_6d_to_matrix(rotation_raw[0,cls_argmax,:]).squeeze().cpu().numpy()
t_pr = translation[0,cls_argmax].squeeze().cpu().numpy()
RT = np.eye(4)
RT[:3, :3] = R_pr
RT[:3, 3] = t_pr
np.savetxt(os.path.join(savePath, f"{names[0]}.txt"), RT)
return
for i in range(): continue def parse_args(): parser = argparse.ArgumentParser(description='Test on SPNv2')
# general
parser.add_argument('--cfg',
help='experiment configure file name',
default="D:\A_projectWok\spnv2-new\experiments\offline_train_full_config_phi3_BN.yaml",
type=str)
parser.add_argument('opts',
help="Modify config options using the command-line",
default=None,
nargs=argparse.REMAINDER)
args = parser.parse_args()
return args
def main(cfg): args = parse_args() update_config(cfg, args)
# Load model to test
test_model = osp.join(cfg.OUTPUT_DIR, cfg.TEST.MODEL_FILE)
if not osp.exists(test_model) or osp.isdir(test_model):
# test_model = osp.join(cfg.OUTPUT_DIR, cfg.MODEL.BACKBONE.NAME,
# cfg.EXP_NAME, 'model_best.pth.tar')
test_model = ""
cfg.defrost()
cfg.TEST.MODEL_FILE = test_model
cfg.freeze()
# Logger & directories
logger, output_dir, _ = create_logger_directories(cfg, 'test')
# Set all seeds & cudNN
set_seeds_cudnn(cfg, seed=cfg.SEED)
# GPU?
device = torch.device('cuda:0') if cfg.CUDA and torch.cuda.is_available() else torch.device('cpu')
# Complete network
model = build_spnv2(cfg)
# Load checkpoint
if cfg.TEST.MODEL_FILE:
model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE, map_location='cpu'), strict=True)
logger.info(' - Model loaded from {}'.format(cfg.TEST.MODEL_FILE))
model = model.to(device)
# Dataloaders
dataset = SPEEDPLUSDataset(root=r"D:\A_projectWok\newTest\spnv2_result\speedplus\speed-2\color")
test_loader = torch.utils.data.DataLoader(
dataset,
batch_size=1,
shuffle=False,
num_workers=1,
pin_memory=True,
drop_last=True,
)
# For validation
camera_json_path = r"D:\A_projectWok\newTest\spnv2_result\speedplus\camera.txt"
camera = load_camera_intrinsics(camera_json_path)
# keypts_true_3D = load_tango_3d_keypoints(cfg.DATASET.KEYPOINTS)
mat_dir = r"D:\A_projectWok\newTest\spnv2_result\speedplus\models\tangoPoints.mat"
vertices = loadmat(mat_dir)['tango3Dpoints'] # [3 x 11]
keypts_true_3D = np.transpose(np.array(vertices, dtype=np.float32)) # [11 x 3]
# ---------------------------------------
# Main Test
# ---------------------------------------
do_valid(model,
test_loader,
camera,
keypts_true_3D,
valid_fraction=None,
log_dir=output_dir,
device=device)
if name=='main': main(cfg)
I see that you seemed to have implemented your own dataset class as shown by from core.dataset.MyDataset import SPEEDPLUSDataset. Please do note that if you follow a different set of image pre-processing steps compared to training, the results will be worse. I suggest that you first try using the original SPEEDPLUSDataset class and see if the problem persists.
thanks for your reply, I also use Speed++ dataset to implement your work. But I just rewrite the SPEEDPLUSDataset in MyDataset.py, because i just need load the images, and use output = model(image) to infer the poses of input. Original SPEEDPLUSDataset's function "getitem()" returns [image, targets], but targets are not used in inference. The code you provided in inference.py--do_valid() function shows that targets are only used for calculating metrics. class SPEEDPLUSDataset(data.Dataset): def init(self, root): super(SPEEDPLUSDataset, self).init() self.root = root self.image_lists = self.processRoot(root)
def processRoot(self, root):
images=[]
for file in os.listdir(root):
if file.endswith('.jpg') or file.endswith('.png'):
images.append(os.path.join(root, file))
return images
def __getitem__(self, index):
image_path = self.image_lists[index]
image = cv2.imread(image_path) # load 1920*1080 images
# image = cv2.resize(image, (768, 512)) # load 1920*1080 images, then resize them to (768,512)
name = os.path.basename(image_path).split('.')[0]
image = torch.from_numpy(image).float()
return image, name
def __len__(self):
return len(self.image_lists)
Besides, I couldn't find the model.ply file online. If it's convenient for you, could you please send me a copy
Could you please check if you are using the data transformations as defined in core/dataset/transforms/build.py?
In line 59, there is a line that goes
transforms += [A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2()]
which effectively processes the image pixel values using the ImageNet's statistics. The trained weights expect this transformation to be part of the pre-processing step.
I couldn't find the model.ply file online,too.