apply to streamed video (webcam)
Hi!
Thank you for this great package! I added a small function to stream from the webcam and add masks on the output. The framerate is a bit slow but might be interesting to develop.
If you want to reproduce :
inference_video.py :
from utils.tools import *
import argparse
import ast
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_path", type=str, default="./weights/FastSAM.pt", help="model"
)
parser.add_argument(
"--img_path", type=str, default="./images/dogs.jpg", help="path to image file"
)
parser.add_argument("--imgsz", type=int, default=1024, help="image size")
parser.add_argument(
"--iou",
type=float,
default=0.9,
help="iou threshold for filtering the annotations",
)
parser.add_argument(
"--text_prompt", type=str, default=None, help='use text prompt eg: "a dog"'
)
parser.add_argument(
"--conf", type=float, default=0.4, help="object confidence threshold"
)
parser.add_argument(
"--output", type=str, default="./output/", help="image save path"
)
parser.add_argument(
"--randomcolor", type=bool, default=True, help="mask random color"
)
parser.add_argument(
"--point_prompt", type=str, default="[[0,0]]", help="[[x1,y1],[x2,y2]]"
)
parser.add_argument(
"--point_label",
type=str,
default="[0]",
help="[1,0] 0:background, 1:foreground",
)
parser.add_argument("--box_prompt", type=str,
default="[0,0,0,0]", help="[x,y,w,h]")
parser.add_argument(
"--better_quality",
type=str,
default=False,
help="better quality using morphologyEx",
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
parser.add_argument(
"--device", type=str, default=device, help="cuda:[0,1,2,3,4] or cpu"
)
parser.add_argument(
"--retina",
type=bool,
default=True,
help="draw high-resolution segmentation masks",
)
parser.add_argument(
"--withContours", type=bool, default=False, help="draw the edges of the masks"
)
parser.add_argument(
"--video_path", type=str, default=0, help="path to video file or integer for webcam"
)
return parser.parse_args()
def overlay_transparent(frame, mask, alpha=0.5):
# Convert single channel mask to 3-channel image to match original frame
mask_rgb = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
# Convert the mask to same datatype as original frame
mask_rgb = mask_rgb.astype(frame.dtype)
# Blend the original frame and the mask
overlay = cv2.addWeighted(frame, alpha, mask_rgb, 1-alpha, 0)
return overlay
def main(args):
# load model
model = YOLO(args.model_path)
args.point_prompt = ast.literal_eval(args.point_prompt)
args.box_prompt = ast.literal_eval(args.box_prompt)
args.point_label = ast.literal_eval(args.point_label)
# Open video stream
cap = cv2.VideoCapture(args.video_path)
frame_count = 0
while(cap.isOpened()):
# Read frame
ret, frame = cap.read()
# save frame to --img_path
cv2.imwrite(args.img_path, frame)
if ret:
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Apply model to frame
results = model(
frame_rgb,
imgsz=args.imgsz,
device=args.device,
retina_masks=args.retina,
iou=args.iou,
conf=args.conf,
max_det=100,
)
if args.box_prompt[2] != 0 and args.box_prompt[3] != 0:
annotations = prompt(results, args, box=True)
annotations = np.array([annotations])
fast_process(
annotations=annotations,
args=args,
mask_random_color=args.randomcolor,
bbox=convert_box_xywh_to_xyxy(args.box_prompt),
)
elif args.text_prompt != None:
results = format_results(results[0], 0)
annotations = prompt(results, args, text=True)
annotations = np.array([annotations])
fast_process(
annotations=annotations, args=args, mask_random_color=args.randomcolor
)
elif args.point_prompt[0] != [0, 0]:
results = format_results(results[0], 0)
annotations = prompt(results, args, point=True)
# list to numpy
annotations = np.array([annotations])
print(annotations.shape)
fast_process(
annotations=annotations,
args=args,
mask_random_color=args.randomcolor,
points=args.point_prompt,
)
else:
fast_process(
annotations=results[0].masks.data,
args=args,
mask_random_color=args.randomcolor,
)
else:
break
cv2.imshow("frame", frame)
# Release capture and destroy windows at the end of the video
cap.release()
cv2.destroyAllWindows()
def prompt(results, args, box=None, point=None, text=None):
ori_img = cv2.imread(args.img_path)
ori_h = ori_img.shape[0]
ori_w = ori_img.shape[1]
if box:
mask, idx = box_prompt(
results[0].masks.data,
convert_box_xywh_to_xyxy(args.box_prompt),
ori_h,
ori_w,
)
elif point:
mask, idx = point_prompt(
results, args.point_prompt, args.point_label, ori_h, ori_w
)
elif text:
mask, idx = text_prompt(results, args)
else:
return None
return mask
if __name__ == "__main__":
args = parse_args()
main(args)
tools.py:
annotations, args, mask_random_color, bbox=None, points=None, edges=False
):
if isinstance(annotations[0], dict):
annotations = [annotation["segmentation"]
for annotation in annotations]
result_name = os.path.basename(args.img_path)
image = cv2.imread(args.img_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
original_h = image.shape[0]
original_w = image.shape[1]
plt.figure(figsize=(original_w/100, original_h/100))
plt.imshow(image)
if args.better_quality == True:
if isinstance(annotations[0], torch.Tensor):
annotations = np.array(annotations.cpu())
for i, mask in enumerate(annotations):
mask = cv2.morphologyEx(
mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones(
(3, 3), np.uint8)
)
annotations[i] = cv2.morphologyEx(
mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones(
(8, 8), np.uint8)
)
if args.device == "cpu":
annotations = np.array(annotations)
fast_show_mask(
annotations,
plt.gca(),
random_color=mask_random_color,
bbox=bbox,
points=points,
pointlabel=args.point_label,
retinamask=args.retina,
target_height=original_h,
target_width=original_w,
)
else:
if isinstance(annotations[0], np.ndarray):
annotations = torch.from_numpy(annotations)
fast_show_mask_gpu(
annotations,
plt.gca(),
random_color=args.randomcolor,
bbox=bbox,
points=points,
pointlabel=args.point_label,
retinamask=args.retina,
target_height=original_h,
target_width=original_w,
)
if isinstance(annotations, torch.Tensor):
annotations = annotations.cpu().numpy()
if args.withContours == True:
contour_all = []
temp = np.zeros((original_h, original_w, 1))
for i, mask in enumerate(annotations):
if type(mask) == dict:
mask = mask["segmentation"]
annotation = mask.astype(np.uint8)
if args.retina == False:
annotation = cv2.resize(
annotation,
(original_w, original_h),
interpolation=cv2.INTER_NEAREST,
)
contours, hierarchy = cv2.findContours(
annotation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
)
for contour in contours:
contour_all.append(contour)
cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2)
color = np.array([0 / 255, 0 / 255, 255 / 255, 0.8])
contour_mask = temp / 255 * color.reshape(1, 1, -1)
plt.imshow(contour_mask)
save_path = args.output
if not os.path.exists(save_path):
os.makedirs(save_path)
plt.axis("off")
fig = plt.gcf()
plt.draw()
try:
buf = fig.canvas.tostring_rgb()
except AttributeError:
fig.canvas.draw()
buf = fig.canvas.tostring_rgb()
cols, rows = fig.canvas.get_width_height()
img_array = np.fromstring(buf, dtype=np.uint8).reshape(rows, cols, 3)
cv2.imwrite(os.path.join(save_path, result_name),
cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR))
cv2.imshow("result", cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR))
# keep 0.1s
# cv2.waitKey(100)
# remove
plt.close()```
Hey @Ashoka74 ,
Thank you for your contribution! You can submit your changes in a pull request and we'll be happy to add you to the contributors.
@Ashoka74 .toolspy doesn't seem to work? Is it possible to clean install?
@Ashoka74 could you please provide the working code for this?
Hi, thanks for your share. I wonder how to keep a same class color during a video? I can see a different color for a class in each image, but it can't keep same for a same class in diferent input
Hi, @crankler Have you found the solution of the problem of different colors for different images?