I3D_Finetune
I3D_Finetune copied to clipboard
I finetuned the model for 3 output classes but now at the time of prediction i got the error of input Shape does not match
input to reshape is a tensor with 409600 values, but the requested shape is 3072
` import argparse import numpy as np import tensorflow as tf import i3d import cv2
def main(IMAGE_SIZE=224,TOP_RESULT_CLASSES=3,SAMPLE_VIDEO_FRAMES = 250,
SAMPLE_PATH_RGB = '../input/v_CricketShot_g04_c01_rgb.npy',
SAMPLE_PATH_VIDEO= '../../finetune_i3d/input/test_vid/Abuse/Abuse001_x264.mp4',
SAMPLE_PATH_FLOW = '../input/v_CricketShot_g04_c01_flow.npy',
EVAL_TYPE = 'rgb',imagenet_pretrained = True,NUM_CLASSES = 3):
"""
This is main fuction which can be called from outher files.
"""
#############
# Constants #
#############
# IMAGE_SIZE = 224
# TOP_RESULT_CLASSES=20
# SAMPLE_VIDEO_FRAMES = 79
# SAMPLE_PATH_RGB = '../input/v_CricketShot_g04_c01_rgb.npy'
# SAMPLE_PATH_FLOW = '../input/v_CricketShot_g04_c01_flow.npy'
CHECKPOINT_PATHS = {
'rgb_imagenet': '../../finetune_i3d/output/finetune-siddata1-rgb-1/siddata1_rgb_0.306_models1-145'
}
LABEL_MAP_PATH = '../models/Label_map.txt'
tf.reset_default_graph()
eval_type = EVAL_TYPE
# imagenet_pretrained = True
# NUM_CLASSES = 400
if eval_type == 'rgb600':
NUM_CLASSES = 600
if eval_type not in ['rgb', 'rgb600', 'flow', 'joint']:
raise ValueError('Bad `eval_type`, must be one of rgb, flow, joint')
if eval_type == 'rgb600':
kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH_600)]
else:
kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH)]
##############
# Model Load #
##############
'''
This will load the model from the saved model directory.
'''
if eval_type in ['rgb', 'rgb600', 'joint']:
rgb_input = tf.placeholder(
tf.float32,
shape=(1, SAMPLE_VIDEO_FRAMES, IMAGE_SIZE, IMAGE_SIZE, 3))
with tf.variable_scope('RGB'):
rgb_model = i3d.InceptionI3d(
NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits')
rgb_logits, _ = rgb_model(
rgb_input, is_training=False, dropout_keep_prob=1.0)
rgb_variable_map = {}
for variable in tf.global_variables():
if variable.name.split('/')[0] == 'RGB':
if eval_type == 'rgb600':
rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d/'):]] = variable
else:
rgb_variable_map[variable.name.replace(':0', '')] = variable
rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)
if eval_type in ['flow', 'joint']:
# Flow input has only 2 channels.
flow_input = tf.placeholder(
tf.float32,
shape=(1, SAMPLE_VIDEO_FRAMES, IMAGE_SIZE, IMAGE_SIZE, 2))
with tf.variable_scope('Flow'):
flow_model = i3d.InceptionI3d(
NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits')
flow_logits, _ = flow_model(
flow_input, is_training=False, dropout_keep_prob=1.0)
flow_variable_map = {}
for variable in tf.global_variables():
if variable.name.split('/')[0] == 'Flow':
flow_variable_map[variable.name.replace(':0', '')] = variable
flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True)
if eval_type == 'rgb' or eval_type == 'rgb600':
model_logits = rgb_logits
elif eval_type == 'flow':
model_logits = flow_logits
else:
model_logits = rgb_logits + flow_logits
model_predictions = tf.nn.softmax(model_logits)
##############
# Input Load #
##############
'''
This will load the input files,
if you have choose rgb then it will read only rgb ,
if you have choose flow then it will load rgb and flow input
'''
def crop_center_square(frame):
y,x = frame.shape[0:2]
min_dim = min(y,x)
start_x = (x // 2) - (min_dim // 2)
start_y = (y // 2) - (min_dim // 2)
return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]
def load_video(path,max_frames=SAMPLE_VIDEO_FRAMES,resize=(224,224)):
cap = cv2.VideoCapture(path)
frames=[]
try:
while True:
ret,frame = cap.read()
if not ret:
break
frame = crop_center_square(frame)
frame = cv2.resize(frame,resize)
frame = frame[:,:,[2,1,0]]
frames.append(frame)
if len(frames) == max_frames:
break
finally:
cap.release()
# print(len(frames))
return [np.array(frames)/255.0]
with tf.Session() as sess:
feed_dict = {}
if eval_type in ['rgb', 'rgb600', 'joint']:
if imagenet_pretrained:
rgb_saver.restore(sess, CHECKPOINT_PATHS['rgb_imagenet'])
else:
rgb_saver.restore(sess, CHECKPOINT_PATHS[eval_type])
print("video shape")
# print(load_video(SAMPLE_PATH_VIDEO).shape)
if SAMPLE_PATH_RGB is None:
rgb_sample = load_video(SAMPLE_PATH_VIDEO)
else:
rgb_sample = no.load(SAMPLE_PATH_RGB)
feed_dict[rgb_input] = rgb_sample
if eval_type in ['flow', 'joint']:
if imagenet_pretrained:
flow_saver.restore(sess, CHECKPOINT_PATHS['flow_imagenet'])
else:
flow_saver.restore(sess, CHECKPOINT_PATHS['flow'])
flow_sample = np.load(SAMPLE_PATH_FLOW)
feed_dict[flow_input] = flow_sample
##############
# Prediction #
##############
out_logits, out_predictions = sess.run(
[model_logits, model_predictions],
feed_dict=feed_dict)
out_logits = out_logits[0]
out_predictions = out_predictions[0]
sorted_indices = np.argsort(out_predictions)[::-1]
print('Norm of logits: %f' % np.linalg.norm(out_logits))
print('\nTop classes and probabilities')
# for index in sorted_indices[:TOP_RESULT_CLASSES]:
# print(out_predictions[index], out_logits[index], kinetics_classes[index])
print(sorted_indices)
# return kinetics_classes[sorted_indices[0]]
main(SAMPLE_PATH_RGB=None) `
This is my code for testing.