EVF-SAM
EVF-SAM copied to clipboard
objects365 dataset annotation file
Dear authors,
Thanks for releasing the updated checkpoints for multi-task segmentation! I tried out the semantic level segmentation demo, and the results are greatly improved.
I was wondering if you'd share the objects365 dataset version you are using to train the checkpoints? And where can I find the annotation files (train & val) that contain segmentation masks? Much appreciated!
We select from original o365 annotations by excluding categories with more than one instance for each image. Then we apply sam-2 to transfer bounding boxes to segmentation masks. Easy code honestly. It might take few hours to annotate the full dataset. We will present detail in our next update of paper, and will release the annotation code.
@CoderZhangYx Hello, we training the evf-sam with refer_com, and want to training with o365. Can you release the o365 annotate? Thank you very much.
here is our pipeline to produce o365 res data:
1.git clone sam2
2.run this py
from collections import Counter
import json
import torch
import cv2
from tqdm import tqdm
import numpy as np
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor
from pycocotools.coco import COCO
from pycocotools import mask as maskUtils
checkpoint = "/sam2_hiera_large.pt"
model_cfg = "sam2_hiera_l.yaml"
predictor = SAM2ImagePredictor(build_sam2(model_cfg, checkpoint))
o365data = COCO(DATA_ROOT+"objects365_train.json")
img_ids = o365data.getImgIds()
collected_info = dict(
images=[],
annotations=[],
categories=list(o365data.cats.values())
)
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.half):
pbar = tqdm(img_ids)
for img_id in pbar:
ann_ids_per_img = o365data.getAnnIds(img_id)
anns = o365data.loadAnns(ann_ids_per_img)
categorys = Counter([ann["category_id"] for ann in `anns])`
boxes = []
category_ids = []
for ann in anns:
if not ann["iscrowd"] and categorys[ann["category_id"]]<2:
box = ann["bbox"]
box[2] += box[0]
box[3] += box[1]
box = [round(_) for _ in box]
boxes.append(box)
category_ids.append(ann["category_id"])
if boxes:
img_info = o365data.loadImgs(img_id)[0]
collected_info["images"].append(img_info)
img_path = DATA_ROOT + "train/" + img_info['file_name']
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
predictor.set_image(img)
masks, _, _ = predictor.predict(box=np.array(boxes), multimask_output=False)
if len(masks.shape)==4:
masks = masks[:, 0]
masks = np.asfortranarray(masks.transpose(1,2,0), np.uint8)
masks = maskUtils.encode(masks)
for idx, (box, category_id, mask) in enumerate(zip(boxes, category_ids, masks)):
mask["counts"] = str(mask["counts"], encoding='utf-8')
collected_info["annotations"].append(
dict(
category_id=category_id,
segmentation=[mask],
image_id=img_id,
bbox=box,
id="{}_{}".format(img_id, idx)
)
)
json.dump(collected_info, open("o365_res_instances.json", 'w'))