barcode_detection_benchmark
barcode_detection_benchmark copied to clipboard
Inference on single Image
Hi, I am trying to run the following code to get the detection boxes from the image but I am always getting an empty list for contours and boxes. please can anyone help me with what i am doing wrong or how can I do it
def image_loader(image_name):
"""load image, returns tensor"""
imsize = 512
image = Image.open(image_name).convert('RGB')
loader= transforms.Compose([
#transforms.Scale(imsize),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
image = loader(image).float()
image = image.unsqueeze(1)
return image
def get_contours_and_boxes(binarized_map, min_area=10):
"""
:param binarized_map: np.array of np.uint8
:param min_area:
:return:
"""
assert binarized_map.dtype == np.uint8
contours, _ = cv2.findContours(
binarized_map,
mode=cv2.RETR_EXTERNAL,
method=cv2.CHAIN_APPROX_SIMPLE
)
contours = list(filter(lambda cnt: cv2.contourArea(cnt) > min_area, contours))
rects = [cv2.minAreaRect(cnt) for cnt in contours]
boxes = [cv2.boxPoints(rect).reshape((-1, 2)) for rect in rects]
assert len(boxes) == len(contours)
return contours, boxes
ckpt_path="/barcode_detection_benchmark/resnet18_unet/ZVZ-real/best_full_init_synth.pth"
model = ZharkovDilatedNet()
state_dict = torch.load(ckpt_path)
model.load_state_dict(state_dict['model_state_dict'],strict=False)
model.eval()
# model loaded successfully
image_path="image_1.png" #example image from zvz_real dataset
image = image_loader(image_path)
heatmap = model(image)[:,0]
heatmap = torch.sigmoid(heatmap).detach().cpu().numpy()
d_binarized_map = (heatmap[:, 0] > 0.5).astype(np.uint8)
cont, box= get_contours_and_boxes(d_binarized_map)
print(cont, box) # getting empty list
@asmekal
Hi
Well, first of all it's strange that you have to load with strict=False
. You can try to ZharkovDilatedNet(in_channels=3)
if I recall correctly the models in benchmarks always have 3 channels
Then, you do heatmap = model(image)[:,0]
and heatmap[:, 0]
which means you transform from (batch, channels, H, W)
to (batch, W)
instead of (H, W)
which is another problem
You can also check the output heatmap (draw it) - maybe segmentation fails on your particular image
thanks for replying @asmekal
when i run without strict-false
( with in_channels=3) , i get the following exception
Traceback (most recent call last): File "custom_inference.py", line 104, in
model.load_state_dict(state_dict['model_state_dict']) File "/home/veeve/anaconda3/envs/few-shot/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1044, in load_state_dict raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format( RuntimeError: Error(s) in loading state_dict for ZharkovDilatedNet: Missing key(s) in state_dict: "net.0.0.weight", "net.0.0.bias", "net.1.0.weight", "net.1.0.bias", "net.2.0.weight", "net.2.0.bias", "net.3.0.weight", "net.3.0.bias", "net.4.0.weight", "net.4.0.bias", "net.5.0.weight", "net.5.0.bias", "net.6.0.weight", "net.6.0.bias", "net.7.0.weight", "net.7.0.bias", "net.8.0.weight", "net.8.0.bias", "net.9.weight", "net.9.bias". Unexpected key(s) in state_dict: "encoder._layers.0.conv1.weight", "encoder._layers.0.bn1.weight", "encoder._layers.0.bn1.bias", "encoder._layers.0.bn1.running_mean", "encoder._layers.0.bn1.running_var", "encoder._layers.0.bn1.num_batches_tracked", "encoder._layers.1.0.conv1.weight", "encoder._layers.1.0.bn1.weight", "encoder._layers.1.0.bn1.bias", "encoder._layers.1.0.bn1.running_mean", "encoder._layers.1.0.bn1.running_var", "encoder._layers.1.0.bn1.num_batches_tracked", "encoder._layers.1.0.conv2.weight", "encoder._layers.1.0.bn2.weight", "encoder._layers.1.0.bn2.bias", "encoder._layers.1.0.bn2.running_mean", "encoder._layers.1.0.bn2.running_var", "encoder._layers.1.0.bn2.num_batches_tracked", "encoder._layers.1.1.conv1.weight", "encoder._layers.1.1.bn1.weight", "encoder._layers.1.1.bn1.bias", "encoder._layers.1.1.bn1.running_mean", "encoder._layers.1.1.bn1.running_var", "encoder._layers.1.1.bn1.num_batches_tracked", "encoder._layers.1.1.conv2.weight", "encoder._layers.1.1.bn2.weight", "encoder._layers.1.1.bn2.bias", "encoder._layers.1.1.bn2.running_mean", "encoder._layers.1.1.bn2.running_var", "encoder._layers.1.1.bn2.num_batches_tracked", "encoder._layers.2.0.conv1.weight", "encoder._layers.2.0.bn1.weight", "encoder._layers.2.0.bn1.bias", "encoder._layers.2.0.bn1.running_mean", "encoder._layers.2.0.bn1.running_var", "encoder._layers.2.0.bn1.num_batches_tracked", "encoder._layers.2.0.conv2.weight", "encoder._layers.2.0.bn2.weight", "encoder._layers.2.0.bn2.bias", "encoder._layers.2.0.bn2.running_mean", "encoder._layers.2.0.bn2.running_var", "encoder._layers.2.0.bn2.num_batches_tracked", "encoder._layers.2.0.downsample.0.weight", "encoder._layers.2.0.downsample.1.weight", "encoder._layers.2.0.downsample.1.bias", "encoder._layers.2.0.downsample.1.running_mean", "encoder._layers.2.0.downsample.1.running_var", "encoder._layers.2.0.downsample.1.num_batches_tracked", "encoder._layers.2.1.conv1.weight", "encoder._layers.2.1.bn1.weight", "encoder._layers.2.1.bn1.bias", "encoder._layers.2.1.bn1.running_mean", "encoder._layers.2.1.bn1.running_var", "encoder._layers.2.1.bn1.num_batches_tracked", "encoder._layers.2.1.conv2.weight", "encoder._layers.2.1.bn2.weight", "encoder._layers.2.1.bn2.bias", "encoder._layers.2.1.bn2.running_mean", "encoder._layers.2.1.bn2.running_var", "encoder._layers.2.1.bn2.num_batches_tracked", "encoder._layers.3.0.conv1.weight", "encoder._layers.3.0.bn1.weight", "encoder._layers.3.0.bn1.bias", "encoder._layers.3.0.bn1.running_mean", "encoder._layers.3.0.bn1.running_var", "encoder._layers.3.0.bn1.num_batches_tracked", "encoder._layers.3.0.conv2.weight", "encoder._layers.3.0.bn2.weight", "encoder._layers.3.0.bn2.bias", "encoder._layers.3.0.bn2.running_mean", "encoder._layers.3.0.bn2.running_var", "encoder._layers.3.0.bn2.num_batches_tracked", "encoder._layers.3.0.downsample.0.weight", "encoder._layers.3.0.downsample.1.weight", "encoder._layers.3.0.downsample.1.bias", "encoder._layers.3.0.downsample.1.running_mean", "encoder._layers.3.0.downsample.1.running_var", "encoder._layers.3.0.downsample.1.num_batches_tracked", "encoder._layers.3.1.conv1.weight", "encoder._layers.3.1.bn1.weight", "encoder._layers.3.1.bn1.bias", "encoder._layers.3.1.bn1.running_mean", "encoder._layers.3.1.bn1.running_var", "encoder._layers.3.1.bn1.num_batches_tracked", "encoder._layers.3.1.conv2.weight", "encoder._layers.3.1.bn2.weight", "encoder._layers.3.1.bn2.bias", "encoder._layers.3.1.bn2.running_mean", "encoder._layers.3.1.bn2.running_var", "encoder._layers.3.1.bn2.num_batches_tracked", "encoder._layers.4.0.conv1.weight", "encoder._layers.4.0.bn1.weight", "encoder._layers.4.0.bn1.bias", "encoder._layers.4.0.bn1.running_mean", "encoder._layers.4.0.bn1.running_var", "encoder._layers.4.0.bn1.num_batches_tracked", "encoder._layers.4.0.conv2.weight", "encoder._layers.4.0.bn2.weight", "encoder._layers.4.0.bn2.bias", "encoder._layers.4.0.bn2.running_mean", "encoder._layers.4.0.bn2.running_var", "encoder._layers.4.0.bn2.num_batches_tracked", "encoder._layers.4.0.downsample.0.weight", "encoder._layers.4.0.downsample.1.weight", "encoder._layers.4.0.downsample.1.bias", "encoder._layers.4.0.downsample.1.running_mean", "encoder._layers.4.0.downsample.1.running_var", "encoder._layers.4.0.downsample.1.num_batches_tracked", "encoder._layers.4.1.conv1.weight", "encoder._layers.4.1.bn1.weight", "encoder._layers.4.1.bn1.bias", "encoder._layers.4.1.bn1.running_mean", "encoder._layers.4.1.bn1.running_var", "encoder._layers.4.1.bn1.num_batches_tracked", "encoder._layers.4.1.conv2.weight", "encoder._layers.4.1.bn2.weight", "encoder._layers.4.1.bn2.bias", "encoder._layers.4.1.bn2.running_mean", "encoder._layers.4.1.bn2.running_var", "encoder._layers.4.1.bn2.num_batches_tracked", "bridge.block._block.0.weight", "bridge.block._block.1.net.0.weight", "bridge.block._block.1.net.0.bias", "bridge.block._block.1.net.0.running_mean", "bridge.block._block.1.net.0.running_var", "bridge.block._block.1.net.0.num_batches_tracked", "bridge.block._block.2.weight", "bridge.block._block.3.net.0.weight", "bridge.block._block.3.net.0.bias", "bridge.block._block.3.net.0.running_mean", "bridge.block._block.3.net.0.running_var", "bridge.block._block.3.net.0.num_batches_tracked", "decoder.blocks.0.block.0.0.weight", "decoder.blocks.0.block.0.1.net.0.weight", "decoder.blocks.0.block.0.1.net.0.bias", "decoder.blocks.0.block.0.1.net.0.running_mean", "decoder.blocks.0.block.0.1.net.0.running_var", "decoder.blocks.0.block.0.1.net.0.num_batches_tracked", "decoder.blocks.0.block.0.2.weight", "decoder.blocks.0.block.0.3.net.0.weight", "decoder.blocks.0.block.0.3.net.0.bias", "decoder.blocks.0.block.0.3.net.0.running_mean", "decoder.blocks.0.block.0.3.net.0.running_var", "decoder.blocks.0.block.0.3.net.0.num_batches_tracked", "decoder.blocks.1.block.0.0.weight", "decoder.blocks.1.block.0.1.net.0.weight", "decoder.blocks.1.block.0.1.net.0.bias", "decoder.blocks.1.block.0.1.net.0.running_mean", "decoder.blocks.1.block.0.1.net.0.running_var", "decoder.blocks.1.block.0.1.net.0.num_batches_tracked", "decoder.blocks.1.block.0.2.weight", "decoder.blocks.1.block.0.3.net.0.weight", "decoder.blocks.1.block.0.3.net.0.bias", "decoder.blocks.1.block.0.3.net.0.running_mean", "decoder.blocks.1.block.0.3.net.0.running_var", "decoder.blocks.1.block.0.3.net.0.num_batches_tracked", "decoder.blocks.2.block.0.0.weight", "decoder.blocks.2.block.0.1.net.0.weight", "decoder.blocks.2.block.0.1.net.0.bias", "decoder.blocks.2.block.0.1.net.0.running_mean", "decoder.blocks.2.block.0.1.net.0.running_var", "decoder.blocks.2.block.0.1.net.0.num_batches_tracked", "decoder.blocks.2.block.0.2.weight", "decoder.blocks.2.block.0.3.net.0.weight", "decoder.blocks.2.block.0.3.net.0.bias", "decoder.blocks.2.block.0.3.net.0.running_mean", "decoder.blocks.2.block.0.3.net.0.running_var", "decoder.blocks.2.block.0.3.net.0.num_batches_tracked", "decoder.blocks.3.block.0.0.weight", "decoder.blocks.3.block.0.1.net.0.weight", "decoder.blocks.3.block.0.1.net.0.bias", "decoder.blocks.3.block.0.1.net.0.running_mean", "decoder.blocks.3.block.0.1.net.0.running_var", "decoder.blocks.3.block.0.1.net.0.num_batches_tracked", "decoder.blocks.3.block.0.2.weight", "decoder.blocks.3.block.0.3.net.0.weight", "decoder.blocks.3.block.0.3.net.0.bias", "decoder.blocks.3.block.0.3.net.0.running_mean", "decoder.blocks.3.block.0.3.net.0.running_var", "decoder.blocks.3.block.0.3.net.0.num_batches_tracked", "head.head.0._block.0.weight", "head.head.0._block.1.net.0.weight", "head.head.0._block.1.net.0.bias", "head.head.0._block.1.net.0.running_mean", "head.head.0._block.1.net.0.running_var", "head.head.0._block.1.net.0.num_batches_tracked", "head.head.0._block.2.weight", "head.head.0._block.3.net.0.weight", "head.head.0._block.3.net.0.bias", "head.head.0._block.3.net.0.running_mean", "head.head.0._block.3.net.0.running_var", "head.head.0._block.3.net.0.num_batches_tracked", "head.head.1.weight", "head.head.1.bias".
It means you are trying to load resnet18unet checkpoint instead of dilatednet you can either choose a related checkpoint or change the model (resnet18unet sources can be found in catalyst repo) https://github.com/abbyy/barcode_detection_benchmark/blob/bcd49723a3c726390a71c0d61037a2b32bff277b/configs/_template.yml#L1-L12
Hi, I am trying to run the following code to get the detection boxes from the image but I am always getting an empty list for contours and boxes. please can anyone help me with what i am doing wrong or how can I do it
def image_loader(image_name): """load image, returns tensor""" imsize = 512 image = Image.open(image_name).convert('RGB') loader= transforms.Compose([ #transforms.Scale(imsize), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) image = loader(image).float() image = image.unsqueeze(1) return image def get_contours_and_boxes(binarized_map, min_area=10): """ :param binarized_map: np.array of np.uint8 :param min_area: :return: """ assert binarized_map.dtype == np.uint8 contours, _ = cv2.findContours( binarized_map, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_SIMPLE ) contours = list(filter(lambda cnt: cv2.contourArea(cnt) > min_area, contours)) rects = [cv2.minAreaRect(cnt) for cnt in contours] boxes = [cv2.boxPoints(rect).reshape((-1, 2)) for rect in rects] assert len(boxes) == len(contours) return contours, boxes ckpt_path="/barcode_detection_benchmark/resnet18_unet/ZVZ-real/best_full_init_synth.pth" model = ZharkovDilatedNet() state_dict = torch.load(ckpt_path) model.load_state_dict(state_dict['model_state_dict'],strict=False) model.eval() # model loaded successfully image_path="image_1.png" #example image from zvz_real dataset image = image_loader(image_path) heatmap = model(image)[:,0] heatmap = torch.sigmoid(heatmap).detach().cpu().numpy() d_binarized_map = (heatmap[:, 0] > 0.5).astype(np.uint8) cont, box= get_contours_and_boxes(d_binarized_map) print(cont, box) # getting empty list
Hi @sanariaz154, I'm facing the same issue, did you get the solution? Please help me out here. Thanks
hi @pencilerazzer what exactly is the issue you are facing? are you getting empty lists for counters and boxes? have you varified if you are loading the correct model?
Thanks for reply @sanariaz154 . I'm getting empty lists for both, over all the barcode dataset. Yes I checked, model is correct.
Is anyone able to run this script successfully?
@sanariaz154 @pencilerazzer did you successfully got the inference for the the checkpoints??? Could you share the code snippet ,facing the same issue