SSD-Tensorflow copied to clipboard
tensorflow ssd video
can the tensorflow-ssd test on videos,and the speed?thank you
yes,the same as pictures
thanks very much your answer. can you provide your test video code?
you need to load a video,then detect it frame by frame
for example:
test on some demo image and ........(modify from this)
cap = cv2.VideoCapture('1.avi') # firstly,you should import cv2 while(cap.isOpened()): # get a frame ret, img = # exchange channel1/3, because cv2 image is different from plt (r, g, b) = cv2.split(img) img = cv2.merge([b, g, r]) .........# then use this img for detect,
@123chengbo thanks very much your answer.another question:test on Videos,but some frame the ssd algorithm can not detect, Why does this happen?
you should give more details,pictures、your code......
@123chengbo test ssd on videos: in the one frame can detect object, but sometimes another frame can not detect object ,why this happen?
you should change few things
in change in Matplotlib show...
def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5): """Visualize bounding boxes. Largely inspired by SSD-MXNET! """ height = img.shape[0] width = img.shape[1] colors = dict() for i in range(classes.shape[0]): cls_id = int(classes[i]) if cls_id >= 0: score = scores[i] if cls_id not in colors: colors[cls_id] = (random.random(), random.random(), random.random()) ymin = int(bboxes[i, 0] * height) xmin = int(bboxes[i, 1] * width) ymax = int(bboxes[i, 2] * height) xmax = int(bboxes[i, 3] * width) tl = (xmin, ymin) br = (xmax, ymax) img = cv2.rectangle(img, tl, br, colors[cls_id], 2) class_name = str(cls_id) img = cv2.putText(img, '{:s} | {:.3f}'.format(class_name, score), tl, cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
cv2.imshow('img', img)
and then make a new python file like this
import os import math import random
import numpy as np import tensorflow as tf import cv2 import time slim = tf.contrib.slim import matplotlib.pyplot as plt import matplotlib.image as mpimg import sys sys.path.append('../') from nets import ssd_vgg_300, ssd_common, np_methods from preprocessing import ssd_vgg_preprocessing from notebooks import visualization gpu_options = tf.GPUOptions(allow_growth=False) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ =, is_training=False, reuse=reuse) ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' saver = tf.train.Saver() saver.restore(isess, ckpt_filename) ssd_anchors = ssd_net.anchors(net_shape) def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)):
rimg, rpredictions, rlocalisations, rbbox_img =[image_4d, predictions, localisations, bbox_img],
feed_dict={img_input: img})
rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
return rclasses, rscores, rbboxes
capture = cv2.VideoCapture('video.avi') ii = 0 frame_rate_divider = 12 #for skiping frame formula= 60/fps while (capture.isOpened()): stime = time.time() #for timing the frame time
ret, frame = # ret is true or false (if video is playing then its true)
if ii % frame_rate_divider == 0:
rclasses, rscores, rbboxes = process_image(frame)
if ret:
visualization.plt_bboxes(frame, rclasses, rscores, rbboxes)
ii += 1
ii += 1
if cv2.waitKey(1) & 0xFF == ord('q'): #if we hit the "Q" key it will go to next line
capture.release() cv2.destroyAllWindows()
Here is the code that worked for me based on the above posts
import os import math import random
import numpy as np import tensorflow as tf import cv2 import time slim = tf.contrib.slim import matplotlib.pyplot as plt import matplotlib.image as mpimg import sys sys.path.append('../') from nets import ssd_vgg_300, ssd_common, np_methods from preprocessing import ssd_vgg_preprocessing from notebooks import visualization gpu_options = tf.GPUOptions(allow_growth=False) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet()
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ =, is_training=False, reuse=reuse) ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' saver = tf.train.Saver() saver.restore(isess, ckpt_filename) ssd_anchors = ssd_net.anchors(net_shape)
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): rimg, rpredictions, rlocalisations, rbbox_img =[image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
capture = cv2.VideoCapture(0) while (capture.isOpened()): ret, img = # ret is true or false (if video is playing then its true) rclasses, rscores, rbboxes = process_image(img) visualization.plt_bboxes(img, rclasses, rscores, rbboxes) if cv2.waitKey(1) & 0xFF == ord('q'): #if we hit the "Q" key it will go to next line break
capture.release() cv2.destroyAllWindows()
def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5): """Visualize bounding boxes. Largely inspired by SSD-MXNET! """ cnames=['background','aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow','diningtable','dog','horse','motorbike','person','pottedplant','sheep','sofa','train','tvmonitor']
height = img.shape[0]
width = img.shape[1]
colors = dict()
for i in range(classes.shape[0]):
cls_id = int(classes[i])
if cls_id >= 0:
score = scores[i]
if cls_id not in colors:
colors[cls_id] = (random.random(), random.random(), random.random())
ymin = int(bboxes[i, 0] * height)
xmin = int(bboxes[i, 1] * width)
ymax = int(bboxes[i, 2] * height)
xmax = int(bboxes[i, 3] * width)
if cls_id<len(cnames):
@cborelc Hi! Thank you for your code.
I'm a student in Taiwan who is just learning about image recognition.
I have tried your code for several times but there is one error which I can't understand...
Could you please help me for solve this problem...?
Sorry for asking you about this, thank your with all my heart.