S2CNet icon indicating copy to clipboard operation
S2CNet copied to clipboard

How to test on my own images

Open wyy-thu opened this issue 1 year ago • 8 comments

I have some images, and I want to get cropped sub-images for each image.

wyy-thu avatar May 09 '24 08:05 wyy-thu

Hello, I've encountered the same issue. Could you kindly let me know if you've managed to resolve it, and if so, how did you go about it?

aa-oo avatar Jul 29 '24 07:07 aa-oo

+1

dongdk avatar Sep 12 '24 09:09 dongdk

hi @wyy-thu i have finished the pipleline of testing my own dataset, first, u should use the faster-rcnn-vg to output the top-score bbox (please note that, the output format of bbox is yxyx), second, u can use generate_bboxes to generate the predefined crops (please note that the output format of bbox is yxyx), finally, run the test.py (should modify the code to support your own dataset).

dongdk avatar Sep 14 '24 07:09 dongdk

Thank you for the insights provided by @dongdk

This is my final implementation:

import torch, os, sys, cv2, random, yaml
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torch.autograd import Variable
from torchvision import models, transforms
from types import SimpleNamespace
from model.ssc import SSC

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def get_fastrcnn_bbox(tf_image, confidence_threshold=0.1):
    bboxes = []
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device)
    model.eval()
    predictions = None
    with torch.no_grad(): 
        predictions = model(tf_image)
    if predictions:
        predictions = predictions[0]
        for i in range(len(predictions['boxes'])):
            score = round(predictions['scores'][i].item(), 2)
            if score > confidence_threshold:
                box = predictions['boxes'][i].cpu().numpy().astype(int)
                (x1, y1, x2, y2) = box
                bboxes.append([int(x1), int(y1), int(x2), int(y2), score])
    return bboxes

def generate_bboxes_1_1(image):
    h = image.shape[0]
    w = image.shape[1]
    h_step = 12
    w_step = 12
    annotations = list()
    for i in range(0,30):
        out_h = h_step*i
        out_w = w_step*i
        if out_h < h and out_w < w and out_h*out_w>0.3*h*w:
            for w_start in range(0,w-out_w,w_step):
                for h_start in range(0,h-out_h,h_step):
                    annotations.append([int(w_start), int(h_start), int(w_start+out_w-1), int(h_start+out_h-1)])
    return annotations

def inference(cfg_path, tf_image, bboxs, fastrcnn_bboxes):
    with open(cfg_path, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    cfg = SimpleNamespace(**config)
    net = SSC(cfg)
    net.load_state_dict(torch.load("./gaicv1_best.pth"))
    cuda = True if torch.cuda.is_available() else False
    if cuda:
        net = torch.nn.DataParallel(net, device_ids=[0])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        net = net.cuda()
    
    
    roi = []
    rcnn_roi = []
    for box in bboxs:
        roi.append((0, box[0], box[1], box[2], box[3]))
    for box in fastrcnn_bboxes:
        rcnn_roi.append((0, box[0], box[1], box[2], box[3]))
    if cuda:
        image = Variable(tf_image.cuda())
        roi = Variable(torch.Tensor(roi))
        rcnn_roi = Variable(torch.Tensor(rcnn_roi))
    else:
        image = Variable(tf_image)
        roi = Variable(roi)
        rcnn_roi = Variable(rcnn_roi)
                
    pre_scores = net(image, roi, rcnn_roi)
    pre_scores = pre_scores.cpu().detach().numpy().reshape(-1)
    max_index = np.argmax(pre_scores)
    finally_rect = bboxs[max_index]
    return [int(value) for value in finally_rect]

def main():
    image_path = sys.argv[1]
    image_size = 256
    debug = True
    
    cv_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    scale = float(image_size) / float(min(cv_image.shape[:2]))
    h = round(cv_image.shape[0] * scale / 32.0) * 32
    w = round(cv_image.shape[1] * scale / 32.0) * 32
    resized_image = cv2.resize(cv_image,(int(w),int(h))) / 256.0
    
    preprocess = transforms.Compose([transforms.ToTensor(), ])
    tf_image = preprocess(resized_image).unsqueeze(0).to(device).float()
    
    # generate fastrcnn bboxes
    fastrcnn_bboxes = get_fastrcnn_bbox(tf_image)
    
    # generate bboxes
    annotations = generate_bboxes_1_1(resized_image)
    
    # inference
    rect = inference("./config/GAICv1.yaml", tf_image, annotations, fastrcnn_bboxes)
    
    if debug:
        # debug_bbox = annotations[:5]
        debug_bbox = [rect]
        print(debug_bbox)
        
        if len(debug_bbox) == 1:
            fig, axs = plt.subplots(1, 2, figsize=(12, 6)) 
            axs[0].axis('off') 

            for i, box in enumerate(debug_bbox):
                color = (random.random(), random.random(), random.random())
                cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)

            axs[0].imshow(resized_image)
            axs[0].set_title("Original")

            box = debug_bbox[0]
            cropped_image = resized_image[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
            axs[1].imshow(cropped_image)
            axs[1].set_title("Cropped")
            axs[1].axis('off')

            plt.tight_layout()
        else:
            for i, box in enumerate(debug_bbox):
                color = (random.random(), random.random(), random.random())
                cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)
                # label = f"Obj: {predictions['labels'][i].item()} ({box[4]:.2f})"
                label = f"{i}:({box[4]:.2f})" if len(box) > 4 else str(i)
                cv2.putText(resized_image, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            plt.imshow(resized_image)
            plt.axis("off")
        plt.show()

if __name__ == "__main__":
    main()

I'm a novice in machine learning. This is my implementation based on my understanding. I'm not even sure if they are correct, but the results seem pretty good. If anyone finds any issues, please let me know in time. Thank you very much! image image

Although it works well, unlike the original, I used Fast R-CNN instead of Faster R-CNN because I couldn't run Faster R-CNN properly. If anyone has a simpler way, please let me know. Additionally, I plan to replace Fast R-CNN with YOLO, but I haven't implemented it yet.

h3clikejava avatar Dec 04 '24 07:12 h3clikejava

Thank you for the insights provided by @dongdk

This is my final implementation: “这是我的最终实现:”

import torch, os, sys, cv2, random, yaml
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torch.autograd import Variable
from torchvision import models, transforms
from types import SimpleNamespace
from model.ssc import SSC

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def get_fastrcnn_bbox(tf_image, confidence_threshold=0.1):
    bboxes = []
    model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device)
    model.eval()
    predictions = None
    with torch.no_grad(): 
        predictions = model(tf_image)
    if predictions:
        predictions = predictions[0]
        for i in range(len(predictions['boxes'])):
            score = round(predictions['scores'][i].item(), 2)
            if score > confidence_threshold:
                box = predictions['boxes'][i].cpu().numpy().astype(int)
                (x1, y1, x2, y2) = box
                bboxes.append([int(x1), int(y1), int(x2), int(y2), score])
    return bboxes

def generate_bboxes_1_1(image):
    h = image.shape[0]
    w = image.shape[1]
    h_step = 12
    w_step = 12
    annotations = list()
    for i in range(0,30):
        out_h = h_step*i
        out_w = w_step*i
        if out_h < h and out_w < w and out_h*out_w>0.3*h*w:
            for w_start in range(0,w-out_w,w_step):
                for h_start in range(0,h-out_h,h_step):
                    annotations.append([int(w_start), int(h_start), int(w_start+out_w-1), int(h_start+out_h-1)])
    return annotations

def inference(cfg_path, tf_image, bboxs, fastrcnn_bboxes):
    with open(cfg_path, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    cfg = SimpleNamespace(**config)
    net = SSC(cfg)
    net.load_state_dict(torch.load("./gaicv1_best.pth"))
    cuda = True if torch.cuda.is_available() else False
    if cuda:
        net = torch.nn.DataParallel(net, device_ids=[0])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        net = net.cuda()
    
    
    roi = []
    rcnn_roi = []
    for box in bboxs:
        roi.append((0, box[0], box[1], box[2], box[3]))
    for box in fastrcnn_bboxes:
        rcnn_roi.append((0, box[0], box[1], box[2], box[3]))
    if cuda:
        image = Variable(tf_image.cuda())
        roi = Variable(torch.Tensor(roi))
        rcnn_roi = Variable(torch.Tensor(rcnn_roi))
    else:
        image = Variable(tf_image)
        roi = Variable(roi)
        rcnn_roi = Variable(rcnn_roi)
                
    pre_scores = net(image, roi, rcnn_roi)
    pre_scores = pre_scores.cpu().detach().numpy().reshape(-1)
    max_index = np.argmax(pre_scores)
    finally_rect = bboxs[max_index]
    return [int(value) for value in finally_rect]

def main():
    image_path = sys.argv[1]
    image_size = 256
    debug = True
    
    cv_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    scale = float(image_size) / float(min(cv_image.shape[:2]))
    h = round(cv_image.shape[0] * scale / 32.0) * 32
    w = round(cv_image.shape[1] * scale / 32.0) * 32
    resized_image = cv2.resize(cv_image,(int(w),int(h))) / 256.0
    
    preprocess = transforms.Compose([transforms.ToTensor(), ])
    tf_image = preprocess(resized_image).unsqueeze(0).to(device).float()
    
    # generate fastrcnn bboxes
    fastrcnn_bboxes = get_fastrcnn_bbox(tf_image)
    
    # generate bboxes
    annotations = generate_bboxes_1_1(resized_image)
    
    # inference
    rect = inference("./config/GAICv1.yaml", tf_image, annotations, fastrcnn_bboxes)
    
    if debug:
        # debug_bbox = annotations[:5]
        debug_bbox = [rect]
        print(debug_bbox)
        
        if len(debug_bbox) == 1:
            fig, axs = plt.subplots(1, 2, figsize=(12, 6)) 
            axs[0].axis('off') 

            for i, box in enumerate(debug_bbox):
                color = (random.random(), random.random(), random.random())
                cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)

            axs[0].imshow(resized_image)
            axs[0].set_title("Original")

            box = debug_bbox[0]
            cropped_image = resized_image[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
            axs[1].imshow(cropped_image)
            axs[1].set_title("Cropped")
            axs[1].axis('off')

            plt.tight_layout()
        else:
            for i, box in enumerate(debug_bbox):
                color = (random.random(), random.random(), random.random())
                cv2.rectangle(resized_image, (box[0], box[1]), (box[2], box[3]), color, 2)
                # label = f"Obj: {predictions['labels'][i].item()} ({box[4]:.2f})"
                label = f"{i}:({box[4]:.2f})" if len(box) > 4 else str(i)
                cv2.putText(resized_image, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            plt.imshow(resized_image)
            plt.axis("off")
        plt.show()

if __name__ == "__main__":
    main()

I'm a novice in machine learning. This is my implementation based on my understanding. I'm not even sure if they are correct, but the results seem pretty good. If anyone finds any issues, please let me know in time. Thank you very much! image image

Although it works well, unlike the original, I used Fast R-CNN instead of Faster R-CNN because I couldn't run Faster R-CNN properly. If anyone has a simpler way, please let me know. Additionally, I plan to replace Fast R-CNN with YOLO, but I haven't implemented it yet.

have you done this by YOLO ?

aiXia121 avatar Jan 17 '25 03:01 aiXia121

it does not matter what kind of detector or segmentation method u use. providing the bboxes is enough. good luck.

dongdk avatar Jan 17 '25 03:01 dongdk

I managed to use pytorch1.11 and torchvision 0.12 to test on my own image. Thanks! @h3clikejava

zhitongcui avatar Mar 19 '25 08:03 zhitongcui