FastSAM apply to streamed video (webcam)

Hi!

Thank you for this great package! I added a small function to stream from the webcam and add masks on the output. The framerate is a bit slow but might be interesting to develop.

If you want to reproduce :

inference_video.py :

from utils.tools import *
import argparse
import ast


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_path", type=str, default="./weights/FastSAM.pt", help="model"
    )
    parser.add_argument(
        "--img_path", type=str, default="./images/dogs.jpg", help="path to image file"
    )
    parser.add_argument("--imgsz", type=int, default=1024, help="image size")
    parser.add_argument(
        "--iou",
        type=float,
        default=0.9,
        help="iou threshold for filtering the annotations",
    )
    parser.add_argument(
        "--text_prompt", type=str, default=None, help='use text prompt eg: "a dog"'
    )
    parser.add_argument(
        "--conf", type=float, default=0.4, help="object confidence threshold"
    )
    parser.add_argument(
        "--output", type=str, default="./output/", help="image save path"
    )
    parser.add_argument(
        "--randomcolor", type=bool, default=True, help="mask random color"
    )
    parser.add_argument(
        "--point_prompt", type=str, default="[[0,0]]", help="[[x1,y1],[x2,y2]]"
    )
    parser.add_argument(
        "--point_label",
        type=str,
        default="[0]",
        help="[1,0] 0:background, 1:foreground",
    )
    parser.add_argument("--box_prompt", type=str,
                        default="[0,0,0,0]", help="[x,y,w,h]")
    parser.add_argument(
        "--better_quality",
        type=str,
        default=False,
        help="better quality using morphologyEx",
    )
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    parser.add_argument(
        "--device", type=str, default=device, help="cuda:[0,1,2,3,4] or cpu"
    )
    parser.add_argument(
        "--retina",
        type=bool,
        default=True,
        help="draw high-resolution segmentation masks",
    )
    parser.add_argument(
        "--withContours", type=bool, default=False, help="draw the edges of the masks"
    )

    parser.add_argument(
        "--video_path", type=str, default=0, help="path to video file or integer for webcam"
    )

    return parser.parse_args()


def overlay_transparent(frame, mask, alpha=0.5):
    # Convert single channel mask to 3-channel image to match original frame
    mask_rgb = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)

    # Convert the mask to same datatype as original frame
    mask_rgb = mask_rgb.astype(frame.dtype)

    # Blend the original frame and the mask
    overlay = cv2.addWeighted(frame, alpha, mask_rgb, 1-alpha, 0)
    return overlay


def main(args):
    # load model
    model = YOLO(args.model_path)
    args.point_prompt = ast.literal_eval(args.point_prompt)
    args.box_prompt = ast.literal_eval(args.box_prompt)
    args.point_label = ast.literal_eval(args.point_label)

    # Open video stream
    cap = cv2.VideoCapture(args.video_path)
    frame_count = 0

    while(cap.isOpened()):
        # Read frame
        ret, frame = cap.read()

        # save frame to --img_path
        cv2.imwrite(args.img_path, frame)

        if ret:
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Apply model to frame
            results = model(
                frame_rgb,
                imgsz=args.imgsz,
                device=args.device,
                retina_masks=args.retina,
                iou=args.iou,
                conf=args.conf,
                max_det=100,
            )

            if args.box_prompt[2] != 0 and args.box_prompt[3] != 0:
                annotations = prompt(results, args, box=True)
                annotations = np.array([annotations])
                fast_process(
                    annotations=annotations,
                    args=args,
                    mask_random_color=args.randomcolor,
                    bbox=convert_box_xywh_to_xyxy(args.box_prompt),
                )

            elif args.text_prompt != None:
                results = format_results(results[0], 0)
                annotations = prompt(results, args, text=True)
                annotations = np.array([annotations])
                fast_process(
                    annotations=annotations, args=args, mask_random_color=args.randomcolor
                )

            elif args.point_prompt[0] != [0, 0]:
                results = format_results(results[0], 0)
                annotations = prompt(results, args, point=True)
                # list to numpy
                annotations = np.array([annotations])
                print(annotations.shape)
                fast_process(
                    annotations=annotations,
                    args=args,
                    mask_random_color=args.randomcolor,
                    points=args.point_prompt,
                )

            else:
                fast_process(
                    annotations=results[0].masks.data,
                    args=args,
                    mask_random_color=args.randomcolor,
                )

        else:
            break

    cv2.imshow("frame", frame)

    # Release capture and destroy windows at the end of the video
    cap.release()
    cv2.destroyAllWindows()


def prompt(results, args, box=None, point=None, text=None):
    ori_img = cv2.imread(args.img_path)
    ori_h = ori_img.shape[0]
    ori_w = ori_img.shape[1]
    if box:
        mask, idx = box_prompt(
            results[0].masks.data,
            convert_box_xywh_to_xyxy(args.box_prompt),
            ori_h,
            ori_w,
        )
    elif point:
        mask, idx = point_prompt(
            results, args.point_prompt, args.point_label, ori_h, ori_w
        )
    elif text:
        mask, idx = text_prompt(results, args)
    else:
        return None
    return mask


if __name__ == "__main__":
    args = parse_args()
    main(args)

tools.py:

    annotations, args, mask_random_color, bbox=None, points=None, edges=False
):
    if isinstance(annotations[0], dict):
        annotations = [annotation["segmentation"]
                       for annotation in annotations]
    result_name = os.path.basename(args.img_path)
    image = cv2.imread(args.img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    original_h = image.shape[0]
    original_w = image.shape[1]
    plt.figure(figsize=(original_w/100, original_h/100))
    plt.imshow(image)
    if args.better_quality == True:
        if isinstance(annotations[0], torch.Tensor):
            annotations = np.array(annotations.cpu())
        for i, mask in enumerate(annotations):
            mask = cv2.morphologyEx(
                mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones(
                    (3, 3), np.uint8)
            )
            annotations[i] = cv2.morphologyEx(
                mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones(
                    (8, 8), np.uint8)
            )
    if args.device == "cpu":
        annotations = np.array(annotations)
        fast_show_mask(
            annotations,
            plt.gca(),
            random_color=mask_random_color,
            bbox=bbox,
            points=points,
            pointlabel=args.point_label,
            retinamask=args.retina,
            target_height=original_h,
            target_width=original_w,
        )
    else:
        if isinstance(annotations[0], np.ndarray):
            annotations = torch.from_numpy(annotations)
        fast_show_mask_gpu(
            annotations,
            plt.gca(),
            random_color=args.randomcolor,
            bbox=bbox,
            points=points,
            pointlabel=args.point_label,
            retinamask=args.retina,
            target_height=original_h,
            target_width=original_w,
        )
    if isinstance(annotations, torch.Tensor):
        annotations = annotations.cpu().numpy()
    if args.withContours == True:
        contour_all = []
        temp = np.zeros((original_h, original_w, 1))
        for i, mask in enumerate(annotations):
            if type(mask) == dict:
                mask = mask["segmentation"]
            annotation = mask.astype(np.uint8)
            if args.retina == False:
                annotation = cv2.resize(
                    annotation,
                    (original_w, original_h),
                    interpolation=cv2.INTER_NEAREST,
                )
            contours, hierarchy = cv2.findContours(
                annotation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
            )
            for contour in contours:
                contour_all.append(contour)
        cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2)
        color = np.array([0 / 255, 0 / 255, 255 / 255, 0.8])
        contour_mask = temp / 255 * color.reshape(1, 1, -1)
        plt.imshow(contour_mask)

    save_path = args.output
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    plt.axis("off")
    fig = plt.gcf()
    plt.draw()

    try:
        buf = fig.canvas.tostring_rgb()
    except AttributeError:
        fig.canvas.draw()
        buf = fig.canvas.tostring_rgb()

    cols, rows = fig.canvas.get_width_height()
    img_array = np.fromstring(buf, dtype=np.uint8).reshape(rows, cols, 3)
    cv2.imwrite(os.path.join(save_path, result_name),
                cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR))
    cv2.imshow("result", cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR))
    # keep 0.1s
    # cv2.waitKey(100)
    # remove
    plt.close()```

Jul 05 '23 02:07 Ashoka74

Hey @Ashoka74 ,

Thank you for your contribution! You can submit your changes in a pull request and we'll be happy to add you to the contributors.

Jul 06 '23 01:07 an-yongqi

@Ashoka74 .toolspy doesn't seem to work? Is it possible to clean install?

Jul 06 '23 15:07 gengarai

@Ashoka74 could you please provide the working code for this?

Feb 18 '24 17:02 Rahul-R-Pai

Hi, thanks for your share. I wonder how to keep a same class color during a video? I can see a different color for a class in each image, but it can't keep same for a same class in diferent input

Mar 14 '24 08:03 crankler

Hi, @crankler Have you found the solution of the problem of different colors for different images?

Apr 01 '24 23:04 Masrur02