How to test on my own images
I have some images, and I want to get cropped sub-images for each image.
Hello, I've encountered the same issue. Could you kindly let me know if you've managed to resolve it, and if so, how did you go about it?
+1
hi @wyy-thu i have finished the pipleline of testing my own dataset, first, u should use the faster-rcnn-vg to output the top-score bbox (please note that, the output format of bbox is yxyx), second, u can use generate_bboxes to generate the predefined crops (please note that the output format of bbox is yxyx), finally, run the test.py (should modify the code to support your own dataset).
Thank you for the insights provided by @dongdk
This is my final implementation:
import torch, os, sys, cv2, random, yaml
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torch.autograd import Variable
from torchvision import models, transforms
from types import SimpleNamespace
from model.ssc import SSC
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
def get_fastrcnn_bbox(tf_image, confidence_threshold=0.1):
bboxes = []
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device)
model.eval()
predictions = None
with torch.no_grad():
predictions = model(tf_image)
if predictions:
predictions = predictions[0]
for i in range(len(predictions['boxes'])):
score = round(predictions['scores'][i].item(), 2)
if score > confidence_threshold:
box = predictions['boxes'][i].cpu().numpy().astype(int)
(x1, y1, x2, y2) = box
bboxes.append([int(x1), int(y1), int(x2), int(y2), score])
return bboxes
def generate_bboxes_1_1(image):
h = image.shape[0]
w = image.shape[1]
h_step = 12
w_step = 12
annotations = list()
for i in range(0,30):
out_h = h_step*i
out_w = w_step*i
if out_h < h and out_w < w and out_h*out_w>0.3*h*w:
for w_start in range(0,w-out_w,w_step):
for h_start in range(0,h-out_h,h_step):
annotations.append([int(w_start), int(h_start), int(w_start+out_w-1), int(h_start+out_h-1)])
return annotations
def inference(cfg_path, tf_image, bboxs, fastrcnn_bboxes):
with open(cfg_path, 'r') as f:
config = yaml.load(f, Loader=yaml.FullLoader)
cfg = SimpleNamespace(**config)
net = SSC(cfg)
net.load_state_dict(torch.load("./gaicv1_best.pth"))
cuda = True if torch.cuda.is_available() else False
if cuda:
net = torch.nn.DataParallel(net, device_ids=[0])
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
net = net.cuda()
roi = []
rcnn_roi = []
for box in bboxs:
roi.append((0, box[0], box[1], box[2], box[3]))
for box in fastrcnn_bboxes:
rcnn_roi.append((0, box[0], box[1], box[2], box[3]))
if cuda:
image = Variable(tf_image.cuda())
roi = Variable(torch.Tensor(roi))
rcnn_roi = Variable(torch.Tensor(rcnn_roi))
else:
image = Variable(tf_image)
roi = Variable(roi)
rcnn_roi = Variable(rcnn_roi)
pre_scores = net(image, roi, rcnn_roi)
pre_scores = pre_scores.cpu().detach().numpy().reshape(-1)
max_index = np.argmax(pre_scores)
finally_rect = bboxs[max_index]
return [int(value) for value in finally_rect]
def main():
image_path = sys.argv[1]
image_size = 256
debug = True
cv_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
scale = float(image_size) / float(min(cv_image.shape[:2]))
h = round(cv_image.shape[0] * scale / 32.0) * 32
w = round(cv_image.shape[1] * scale / 32.0) * 32
resized_image = cv2.resize(cv_image,(int(w),int(h))) / 256.0
preprocess = transforms.Compose([transforms.ToTensor(), ])
tf_image = preprocess(resized_image).unsqueeze(0).to(device).float()
# generate fastrcnn bboxes
fastrcnn_bboxes = get_fastrcnn_bbox(tf_image)
# generate bboxes
annotations = generate_bboxes_1_1(resized_image)
# inference
rect = inference("./config/GAICv1.yaml", tf_image, annotations, fastrcnn_bboxes)
if debug:
# debug_bbox = annotations[:5]
debug_bbox = [rect]
print(debug_bbox)
if len(debug_bbox) == 1:
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
axs[0].axis('off')
for i, box in enumerate(debug_bbox):
color = (random.random(), random.random(), random.random())
cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)
axs[0].imshow(resized_image)
axs[0].set_title("Original")
box = debug_bbox[0]
cropped_image = resized_image[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
axs[1].imshow(cropped_image)
axs[1].set_title("Cropped")
axs[1].axis('off')
plt.tight_layout()
else:
for i, box in enumerate(debug_bbox):
color = (random.random(), random.random(), random.random())
cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)
# label = f"Obj: {predictions['labels'][i].item()} ({box[4]:.2f})"
label = f"{i}:({box[4]:.2f})" if len(box) > 4 else str(i)
cv2.putText(resized_image, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
plt.imshow(resized_image)
plt.axis("off")
plt.show()
if __name__ == "__main__":
main()
I'm a novice in machine learning. This is my implementation based on my understanding. I'm not even sure if they are correct, but the results seem pretty good. If anyone finds any issues, please let me know in time. Thank you very much!
Although it works well, unlike the original, I used Fast R-CNN instead of Faster R-CNN because I couldn't run Faster R-CNN properly. If anyone has a simpler way, please let me know. Additionally, I plan to replace Fast R-CNN with YOLO, but I haven't implemented it yet.
Thank you for the insights provided by @dongdk
This is my final implementation: “这是我的最终实现:”
import torch, os, sys, cv2, random, yaml import numpy as np from PIL import Image import matplotlib.pyplot as plt from torch.autograd import Variable from torchvision import models, transforms from types import SimpleNamespace from model.ssc import SSC device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') def get_fastrcnn_bbox(tf_image, confidence_threshold=0.1): bboxes = [] model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device) model.eval() predictions = None with torch.no_grad(): predictions = model(tf_image) if predictions: predictions = predictions[0] for i in range(len(predictions['boxes'])): score = round(predictions['scores'][i].item(), 2) if score > confidence_threshold: box = predictions['boxes'][i].cpu().numpy().astype(int) (x1, y1, x2, y2) = box bboxes.append([int(x1), int(y1), int(x2), int(y2), score]) return bboxes def generate_bboxes_1_1(image): h = image.shape[0] w = image.shape[1] h_step = 12 w_step = 12 annotations = list() for i in range(0,30): out_h = h_step*i out_w = w_step*i if out_h < h and out_w < w and out_h*out_w>0.3*h*w: for w_start in range(0,w-out_w,w_step): for h_start in range(0,h-out_h,h_step): annotations.append([int(w_start), int(h_start), int(w_start+out_w-1), int(h_start+out_h-1)]) return annotations def inference(cfg_path, tf_image, bboxs, fastrcnn_bboxes): with open(cfg_path, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) cfg = SimpleNamespace(**config) net = SSC(cfg) net.load_state_dict(torch.load("./gaicv1_best.pth")) cuda = True if torch.cuda.is_available() else False if cuda: net = torch.nn.DataParallel(net, device_ids=[0]) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False net = net.cuda() roi = [] rcnn_roi = [] for box in bboxs: roi.append((0, box[0], box[1], box[2], box[3])) for box in fastrcnn_bboxes: rcnn_roi.append((0, box[0], box[1], box[2], box[3])) if cuda: image = Variable(tf_image.cuda()) roi = Variable(torch.Tensor(roi)) rcnn_roi = Variable(torch.Tensor(rcnn_roi)) else: image = Variable(tf_image) roi = Variable(roi) rcnn_roi = Variable(rcnn_roi) pre_scores = net(image, roi, rcnn_roi) pre_scores = pre_scores.cpu().detach().numpy().reshape(-1) max_index = np.argmax(pre_scores) finally_rect = bboxs[max_index] return [int(value) for value in finally_rect] def main(): image_path = sys.argv[1] image_size = 256 debug = True cv_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB) scale = float(image_size) / float(min(cv_image.shape[:2])) h = round(cv_image.shape[0] * scale / 32.0) * 32 w = round(cv_image.shape[1] * scale / 32.0) * 32 resized_image = cv2.resize(cv_image,(int(w),int(h))) / 256.0 preprocess = transforms.Compose([transforms.ToTensor(), ]) tf_image = preprocess(resized_image).unsqueeze(0).to(device).float() # generate fastrcnn bboxes fastrcnn_bboxes = get_fastrcnn_bbox(tf_image) # generate bboxes annotations = generate_bboxes_1_1(resized_image) # inference rect = inference("./config/GAICv1.yaml", tf_image, annotations, fastrcnn_bboxes) if debug: # debug_bbox = annotations[:5] debug_bbox = [rect] print(debug_bbox) if len(debug_bbox) == 1: fig, axs = plt.subplots(1, 2, figsize=(12, 6)) axs[0].axis('off') for i, box in enumerate(debug_bbox): color = (random.random(), random.random(), random.random()) cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2) axs[0].imshow(resized_image) axs[0].set_title("Original") box = debug_bbox[0] cropped_image = resized_image[int(box[1]):int(box[3]), int(box[0]):int(box[2])] axs[1].imshow(cropped_image) axs[1].set_title("Cropped") axs[1].axis('off') plt.tight_layout() else: for i, box in enumerate(debug_bbox): color = (random.random(), random.random(), random.random()) cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2) # label = f"Obj: {predictions['labels'][i].item()} ({box[4]:.2f})" label = f"{i}:({box[4]:.2f})" if len(box) > 4 else str(i) cv2.putText(resized_image, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) plt.imshow(resized_image) plt.axis("off") plt.show() if __name__ == "__main__": main()I'm a novice in machine learning. This is my implementation based on my understanding. I'm not even sure if they are correct, but the results seem pretty good. If anyone finds any issues, please let me know in time. Thank you very much!
![]()
Although it works well, unlike the original, I used Fast R-CNN instead of Faster R-CNN because I couldn't run Faster R-CNN properly. If anyone has a simpler way, please let me know. Additionally, I plan to replace Fast R-CNN with YOLO, but I haven't implemented it yet.
have you done this by YOLO ?
it does not matter what kind of detector or segmentation method u use. providing the bboxes is enough. good luck.
I managed to use pytorch1.11 and torchvision 0.12 to test on my own image. Thanks! @h3clikejava