can the tensorflow-ssd test on videos，and the speed？thank you

May 25 '17 11:05 zxq309

yes,the same as pictures

May 27 '17 08:05 jet309

thanks very much your answer. can you provide your test video code?

May 31 '17 10:05 zxq309

you need to load a video，then detect it frame by frame

for example： ssd_test_image.py

test on some demo image and ........（modify from this）

cap = cv2.VideoCapture('1.avi') # firstly,you should import cv2 while(cap.isOpened()): # get a frame ret, img = cap.read() # exchange channel1/3, because cv2 image is different from plt (r, g, b) = cv2.split(img) img = cv2.merge([b, g, r]) .........# then use this img for detect,

Jun 12 '17 09:06 123chengbo

@123chengbo thanks very much your answer.another question：test on Videos,but some frame the ssd algorithm can not detect， Why does this happen?

Jun 15 '17 09:06 zxq309

you should give more details，pictures、your code......

Jun 19 '17 08:06 123chengbo

Jun 27 '17 02:06 mingfengwuye

@123chengbo test ssd on videos: in the one frame can detect object, but sometimes another frame can not detect object ,why this happen?

Jul 03 '17 05:07 zxq309

请问你解决这个问题了吗，我现在可以检测视频，但是出来的结果却是将视频分割成图片一帧帧的显示，我想要的是播放视频的同时在视频上实时显示检测结果啊，请问可以实现吗

Aug 16 '17 08:08 lfxx

you should change few things

in visualization.py change in Matplotlib show...

def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5): """Visualize bounding boxes. Largely inspired by SSD-MXNET! """ height = img.shape[0] width = img.shape[1] colors = dict() for i in range(classes.shape[0]): cls_id = int(classes[i]) if cls_id >= 0: score = scores[i] if cls_id not in colors: colors[cls_id] = (random.random(), random.random(), random.random()) ymin = int(bboxes[i, 0] * height) xmin = int(bboxes[i, 1] * width) ymax = int(bboxes[i, 2] * height) xmax = int(bboxes[i, 3] * width) tl = (xmin, ymin) br = (xmax, ymax) img = cv2.rectangle(img, tl, br, colors[cls_id], 2) class_name = str(cls_id) img = cv2.putText(img, '{:s} | {:.3f}'.format(class_name, score), tl, cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)

cv2.imshow('img', img)

and then make a new python file like this

import os import math import random

import numpy as np import tensorflow as tf import cv2 import time slim = tf.contrib.slim import matplotlib.pyplot as plt import matplotlib.image as mpimg import sys sys.path.append('../') from nets import ssd_vgg_300, ssd_common, np_methods from preprocessing import ssd_vgg_preprocessing from notebooks import visualization gpu_options = tf.GPUOptions(allow_growth=False) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) ssd_anchors = ssd_net.anchors(net_shape) def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)):

rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],
            feed_dict={img_input: img})

rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)

rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
return rclasses, rscores, rbboxes

capture = cv2.VideoCapture('video.avi') ii = 0 frame_rate_divider = 12 #for skiping frame formula= 60/fps while (capture.isOpened()): stime = time.time() #for timing the frame time

ret, frame = capture.read()   # ret is true or false (if video is playing then its true)
if ii % frame_rate_divider == 0:
   rclasses, rscores, rbboxes =  process_image(frame)
   if ret:       
      visualization.plt_bboxes(frame, rclasses, rscores, rbboxes)
      ii += 1
else:
   ii += 1
if cv2.waitKey(1) & 0xFF == ord('q'):    #if we hit the "Q" key it will go to next line
        break

capture.release() cv2.destroyAllWindows()

Feb 27 '18 14:02 Sarwar1000

Here is the code that worked for me based on the above posts

webcam.py:

import os import math import random

import numpy as np import tensorflow as tf import cv2 import time slim = tf.contrib.slim import matplotlib.pyplot as plt import matplotlib.image as mpimg import sys sys.path.append('../') from nets import ssd_vgg_300, ssd_common, np_methods from preprocessing import ssd_vgg_preprocessing from notebooks import visualization gpu_options = tf.GPUOptions(allow_growth=False) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet()

with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) ssd_anchors = ssd_net.anchors(net_shape)

def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes

capture = cv2.VideoCapture(0) while (capture.isOpened()): ret, img = capture.read() # ret is true or false (if video is playing then its true) rclasses, rscores, rbboxes = process_image(img) visualization.plt_bboxes(img, rclasses, rscores, rbboxes) if cv2.waitKey(1) & 0xFF == ord('q'): #if we hit the "Q" key it will go to next line break

capture.release() cv2.destroyAllWindows()

visualize.py:

def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5): """Visualize bounding boxes. Largely inspired by SSD-MXNET! """ cnames=['background','aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow','diningtable','dog','horse','motorbike','person','pottedplant','sheep','sofa','train','tvmonitor']

height = img.shape[0]
width = img.shape[1]
colors = dict()
for i in range(classes.shape[0]):
    cls_id = int(classes[i])
    if cls_id >= 0:
        score = scores[i]
        if cls_id not in colors:
            colors[cls_id] = (random.random(), random.random(), random.random())
        ymin = int(bboxes[i, 0] * height)
        xmin = int(bboxes[i, 1] * width)
        ymax = int(bboxes[i, 2] * height)
        xmax = int(bboxes[i, 3] * width)
        tl=(xmin,ymin)
        br=(xmax,ymax)
        img=cv2.rectangle(img,tl,br,colors[cls_id],2)
        class_name=cnames[cls_id]
       
        if cls_id<len(cnames):
            img=cv2.putText(img,'%s'%cnames[cls_id]+str(score),tl,cv2.FONT_HERSHEY_COMPLEX,1,(255,255,255),2)
        cv2.imshow('img',img)

May 01 '18 20:05 cborelc

@cborelc Hi! Thank you for your code. I'm a student in Taiwan who is just learning about image recognition. I have tried your code for several times but there is one error which I can't understand... Could you please help me for solve this problem...? Sorry for asking you about this, thank your with all my heart. default

Jul 03 '18 05:07 EdwinChien

请问你解决这个问题了吗，我现在可以检测视频，但是出来的结果却是将视频分割成图片一帧帧的显示，我想要的是播放视频的同时在视频上实时显示检测结果啊，请问可以实现吗

请问你现在解决这个问题了吗？可以参考下你的代码吗？

May 18 '20 09:05 china56321

SSD-Tensorflow
SSD-Tensorflow copied to clipboard

tensorflow ssd video

test on some demo image and ........（modify from this）

you should change few things

in visualization.py change in Matplotlib show...

and then make a new python file like this

Here is the code that worked for me based on the above posts

visualize.py:

SSD-Tensorflow SSD-Tensorflow copied to clipboard

tensorflow ssd video

test on some demo image and ........（modify from this）

you should change few things

in visualization.py change in Matplotlib show...

and then make a new python file like this

Here is the code that worked for me based on the above posts

visualize.py:

SSD-Tensorflow
SSD-Tensorflow copied to clipboard