SSD-Tensorflow
SSD-Tensorflow copied to clipboard
tensorflow ssd video
can the tensorflow-ssd test on videos,and the speed?thank you
yes,the same as pictures
thanks very much your answer. can you provide your test video code?
you need to load a video,then detect it frame by frame
for example: ssd_test_image.py
test on some demo image and ........(modify from this)
cap = cv2.VideoCapture('1.avi') # firstly,you should import cv2 while(cap.isOpened()): # get a frame ret, img = cap.read() # exchange channel1/3, because cv2 image is different from plt (r, g, b) = cv2.split(img) img = cv2.merge([b, g, r]) .........# then use this img for detect,
@123chengbo thanks very much your answer.another question:test on Videos,but some frame the ssd algorithm can not detect, Why does this happen?
you should give more details,pictures、your code......
@123chengbo test ssd on videos: in the one frame can detect object, but sometimes another frame can not detect object ,why this happen?
请问你解决这个问题了吗,我现在可以检测视频,但是出来的结果却是将视频分割成图片一帧帧的显示,我想要的是播放视频的同时在视频上实时显示检测结果啊,请问可以实现吗
you should change few things
in visualization.py change in Matplotlib show...
def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5): """Visualize bounding boxes. Largely inspired by SSD-MXNET! """ height = img.shape[0] width = img.shape[1] colors = dict() for i in range(classes.shape[0]): cls_id = int(classes[i]) if cls_id >= 0: score = scores[i] if cls_id not in colors: colors[cls_id] = (random.random(), random.random(), random.random()) ymin = int(bboxes[i, 0] * height) xmin = int(bboxes[i, 1] * width) ymax = int(bboxes[i, 2] * height) xmax = int(bboxes[i, 3] * width) tl = (xmin, ymin) br = (xmax, ymax) img = cv2.rectangle(img, tl, br, colors[cls_id], 2) class_name = str(cls_id) img = cv2.putText(img, '{:s} | {:.3f}'.format(class_name, score), tl, cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
cv2.imshow('img', img)
and then make a new python file like this
import os import math import random
import numpy as np import tensorflow as tf import cv2 import time slim = tf.contrib.slim import matplotlib.pyplot as plt import matplotlib.image as mpimg import sys sys.path.append('../') from nets import ssd_vgg_300, ssd_common, np_methods from preprocessing import ssd_vgg_preprocessing from notebooks import visualization gpu_options = tf.GPUOptions(allow_growth=False) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) ssd_anchors = ssd_net.anchors(net_shape) def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)):
rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],
feed_dict={img_input: img})
rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
return rclasses, rscores, rbboxes
capture = cv2.VideoCapture('video.avi') ii = 0 frame_rate_divider = 12 #for skiping frame formula= 60/fps while (capture.isOpened()): stime = time.time() #for timing the frame time
ret, frame = capture.read() # ret is true or false (if video is playing then its true)
if ii % frame_rate_divider == 0:
rclasses, rscores, rbboxes = process_image(frame)
if ret:
visualization.plt_bboxes(frame, rclasses, rscores, rbboxes)
ii += 1
else:
ii += 1
if cv2.waitKey(1) & 0xFF == ord('q'): #if we hit the "Q" key it will go to next line
break
capture.release() cv2.destroyAllWindows()
Here is the code that worked for me based on the above posts
webcam.py:
import os import math import random
import numpy as np import tensorflow as tf import cv2 import time slim = tf.contrib.slim import matplotlib.pyplot as plt import matplotlib.image as mpimg import sys sys.path.append('../') from nets import ssd_vgg_300, ssd_common, np_methods from preprocessing import ssd_vgg_preprocessing from notebooks import visualization gpu_options = tf.GPUOptions(allow_growth=False) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet()
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) ssd_anchors = ssd_net.anchors(net_shape)
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
capture = cv2.VideoCapture(0) while (capture.isOpened()): ret, img = capture.read() # ret is true or false (if video is playing then its true) rclasses, rscores, rbboxes = process_image(img) visualization.plt_bboxes(img, rclasses, rscores, rbboxes) if cv2.waitKey(1) & 0xFF == ord('q'): #if we hit the "Q" key it will go to next line break
capture.release() cv2.destroyAllWindows()
visualize.py:
def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5): """Visualize bounding boxes. Largely inspired by SSD-MXNET! """ cnames=['background','aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow','diningtable','dog','horse','motorbike','person','pottedplant','sheep','sofa','train','tvmonitor']
height = img.shape[0]
width = img.shape[1]
colors = dict()
for i in range(classes.shape[0]):
cls_id = int(classes[i])
if cls_id >= 0:
score = scores[i]
if cls_id not in colors:
colors[cls_id] = (random.random(), random.random(), random.random())
ymin = int(bboxes[i, 0] * height)
xmin = int(bboxes[i, 1] * width)
ymax = int(bboxes[i, 2] * height)
xmax = int(bboxes[i, 3] * width)
tl=(xmin,ymin)
br=(xmax,ymax)
img=cv2.rectangle(img,tl,br,colors[cls_id],2)
class_name=cnames[cls_id]
if cls_id<len(cnames):
img=cv2.putText(img,'%s'%cnames[cls_id]+str(score),tl,cv2.FONT_HERSHEY_COMPLEX,1,(255,255,255),2)
cv2.imshow('img',img)
@cborelc Hi! Thank you for your code.
I'm a student in Taiwan who is just learning about image recognition.
I have tried your code for several times but there is one error which I can't understand...
Could you please help me for solve this problem...?
Sorry for asking you about this, thank your with all my heart.
请问你解决这个问题了吗,我现在可以检测视频,但是出来的结果却是将视频分割成图片一帧帧的显示,我想要的是播放视频的同时在视频上实时显示检测结果啊,请问可以实现吗
请问你现在解决这个问题了吗?可以参考下你的代码吗?