How to make predictions using image bytes instead of image paths
For now I am doing inference on images following the code in the inference.ipynb file. However, I have realised that it uses image paths to be able to make the inference.
However, due to limitations in the API service i have to integrate this with (which sends over image bytes), it would be much more optimal for me to do inference directly on image bytes. Is there a way to do this?
There are several ways:
(1) if you're using YOLO-World for open vocabulary detection, you can see: https://github.com/AILab-CVC/YOLO-World/blob/master/demo/simple_demo.py#L48, which adopts numpy.array as inputs. You can decode the bitstream to numpy.array.
(2) if you're using YOLO-World for close-set detection, you can also use ONNX models, which might be much efficient.
@wondervictor
File "/home/jupyter/til-24-base/vlm/YOLO-World/VLMManager.py", line 25, in init self.model = init_detector(cfg, checkpoint=checkpoint, device='cuda:0') File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/mmdet/apis/inference.py", line 102, in init_detector metainfo = DATASETS.build(test_dataset_cfg).metainfo File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/mmengine/registry/registry.py", line 570, in build return self.build_func(cfg, *args, **kwargs, registry=self) File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg obj = obj_cls(**args) # type: ignore File "/home/jupyter/til-24-base/vlm/YOLO-World/yolo_world/datasets/mm_dataset.py", line 25, in init self.dataset = DATASETS.build(dataset) File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/mmengine/registry/registry.py", line 570, in build return self.build_func(cfg, *args, **kwargs, registry=self) File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg obj = obj_cls(**args) # type: ignore File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/mmyolo/datasets/yolov5_coco.py", line 19, in init super().init(*args, **kwargs) File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/mmdet/datasets/base_det_dataset.py", line 51, in init super().init(*args, **kwargs) File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/mmengine/dataset/base_dataset.py", line 247, in init self.full_init() File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/mmyolo/datasets/yolov5_coco.py", line 27, in full_init self.data_list = self.load_data_list() File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/mmdet/datasets/lvis.py", line 600, in load_data_list raise ImportError( ImportError: Package lvis is not installed. Please run "pip install git+https://github.com/lvis-dataset/lvis-api.git".
This happened when i tried to use the code in simple_demo.py. Please advice on how to change such that this error will be solved (best if do not need to install lvis because of less dependencies). My code is below
import numpy as np
import torch
from mmengine.config import Config
from mmengine.dataset import Compose
from mmengine.runner import Runner
from mmengine.runner.amp import autocast
from mmyolo.registry import RUNNERS
from torchvision.ops import nms
import PIL.Image
import cv2
import supervision as sv
import glob
import json
if __name__ == "__main__":
# load config
cfg = Config.fromfile(
"../configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py"
)
cfg.work_dir = "."
# cfg.load_from = "../training_epochs/total_30_epochs/epoch_20.pth"
cfg.load_from = "../pretrained_weights/yolo_world_v2_l_obj365v1_goldg_pretrain_1280ft-9babe3f6.pth"
runner = Runner.from_cfg(cfg)
runner.call_hook("before_run")
runner.load_or_resume()
pipeline = cfg.test_dataloader.dataset.pipeline
runner.pipeline = Compose(pipeline)
# run model evaluation
runner.model.eval()
def colorstr(*input):
"""
Helper function for style logging
"""
*args, string = input if len(input) > 1 else ("bold", input[0])
colors = {"bold": "\033[1m"}
return "".join(colors[x] for x in args) + f"{string}"
bounding_box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)
mask_annotator = sv.MaskAnnotator()
# # class_names = ("grey camouflage fighter jet . grey and white fighter plane . white and black drone . white and black fighter jet . white missile . black and white commercial aircraft")
# class_names = ("grey missile . red, white, and blue light aircraft . green and black missile . white and red helicopter .")
# class_names.strip()
with open("../../augmented_yolo_train_dataset/annotations/yolo_world_train.json", 'r') as f:
dataset = json.load(f)
def run_image(
runner,
input_image,
class_names,
output_image,
max_num_boxes=100,
score_thr=0.05,
nms_thr=0.5,
):
print(f"Analysing {input_image}...\n class_name: {class_names}")
print("=================================================================================================================")
output_image = "./runs/detect/"+output_image
class_names.strip()
texts = [[t.strip()] for t in class_names.split(".")] + [[" "]]
print(texts)
data_info = runner.pipeline(dict(img_id=0, img_path=input_image,
texts=texts))
data_batch = dict(
inputs=data_info["inputs"].unsqueeze(0),
data_samples=[data_info["data_samples"]],
)
with autocast(enabled=False), torch.no_grad():
output = runner.model.test_step(data_batch)[0]
runner.model.class_names = texts
pred_instances = output.pred_instances
# nms
keep_idxs = nms(pred_instances.bboxes, pred_instances.scores, iou_threshold=nms_thr)
pred_instances = pred_instances[keep_idxs]
pred_instances = pred_instances[pred_instances.scores.float() > score_thr]
if len(pred_instances.scores) > max_num_boxes:
indices = pred_instances.scores.float().topk(max_num_boxes)[1]
pred_instances = pred_instances[indices]
output.pred_instances = pred_instances
# predictions
pred_instances = pred_instances.cpu().numpy()
print(pred_instances)
if 'masks' in pred_instances:
masks = pred_instances['masks']
else:
masks = None
detections = sv.Detections(
xyxy=pred_instances['bboxes'],
class_id=pred_instances['labels'],
confidence=pred_instances['scores']
)
# label ids with confidence scores
labels = [
f"{class_id} {confidence:0.2f}"
for class_id, confidence
in zip(detections.class_id, detections.confidence)
]
# draw bounding box with label
image = PIL.Image.open(input_image)
svimage = np.array(image)
svimage = bounding_box_annotator.annotate(svimage, detections)
svimage = label_annotator.annotate(svimage, detections, labels)
if masks is not None:
svimage = mask_annotator.annotate(image, detections)
# save output image
cv2.imwrite(output_image, svimage[:, :, ::-1])
print(f"Results saved to {colorstr('bold', output_image)}")
#return svimage[:, :, ::-1]
# print(image_test_inference_list)
for i in range(21, 63):
image_path = f"../../unaugmented_images_for_testing/image_{i}.jpeg"
class_names = dataset["images"][i]["caption"]
file_name = image_path[37:]
run_image(runner, image_path, class_names, "pred_"+file_name)
@wondervictor please advise on how to edit the config files/deal with this error.
The config i used for the Config.from() is taken from the file im this github and it is uneditted
@wondervictor hi sorry for troubling you so often, but could I get some advice on this? Must I just pip install from the github link?
@wondervictor I have resolved the error by pip installing the github link. However I was met with an error " File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/lvis/lvis.py", line 35, in _load_json with open(path, "r") as f: FileNotFoundError: [Errno 2] No such file or directory: 'data/coco/lvis/lvis_v1_minival_inserted_image_name.json'"
I tried to solve this by creating a json file of the name under the directories (json was empty). But then I was once again faced with another error
File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/mmyolo/datasets/yolov5_coco.py", line 27, in full_init self.data_list = self.load_data_list() File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/mmdet/datasets/lvis.py", line 605, in load_data_list self.lvis = LVIS(local_path) File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/lvis/lvis.py", line 27, in __init__ self.dataset = self._load_json(annotation_path) File "/home/jupyter/til-24-base/vlm/YOLOvenv/lib/python3.10/site-packages/lvis/lvis.py", line 36, in _load_json return json.load(f) File "/opt/conda/lib/python3.10/json/__init__.py", line 293, in load return loads(fp.read(), File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads return _default_decoder.decode(s) File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode obj, end = self.raw_decode(s, idx=_w(s, 0).end()) File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode raise JSONDecodeError("Expecting value", s, err.value) from None json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Does this mean that my json file must have data in order for it to work? Is there a way of bypassing all of these errors while still being able to do inference on image bytes? Thank you very much for any help given! If I have to comment out any of the code in the default config code in
configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py or have to make any edits to it, please advise on it because I am not fully sure on how to edit the config files for my own purpose
in demo/simple_demo.py I commented out this line image = image[:, :, [2, 1, 0]] in inference() and it started giving me better results.
init
The reason might be the download. Please refer to https://hf-mirror.com/GLIPModel/GLIP/tree/main and download the file. Remenber to check the content is in a json format. (I used wget to download from hf-mirror, but the file I downloaded was in a xml format)