I3D_Finetune I finetuned the model for 3 output classes but now at the time of prediction i got the error of input Shape does not match

I finetuned the model for 3 output classes but now at the time of prediction i got the error of input Shape does not match

Open siddharth2022 opened this issue 6 years ago • 1 comments

Screenshot (232) input to reshape is a tensor with 409600 values, but the requested shape is 3072

Jun 21 '19 14:06 siddharth2022

` import argparse import numpy as np import tensorflow as tf import i3d import cv2

def main(IMAGE_SIZE=224,TOP_RESULT_CLASSES=3,SAMPLE_VIDEO_FRAMES = 250,
SAMPLE_PATH_RGB = '../input/v_CricketShot_g04_c01_rgb.npy',
SAMPLE_PATH_VIDEO= '../../finetune_i3d/input/test_vid/Abuse/Abuse001_x264.mp4',
SAMPLE_PATH_FLOW = '../input/v_CricketShot_g04_c01_flow.npy',
EVAL_TYPE = 'rgb',imagenet_pretrained = True,NUM_CLASSES = 3): """ This is main fuction which can be called from outher files. """

#############
# Constants #
#############


# IMAGE_SIZE = 224
# TOP_RESULT_CLASSES=20
# SAMPLE_VIDEO_FRAMES = 79

# SAMPLE_PATH_RGB = '../input/v_CricketShot_g04_c01_rgb.npy'
# SAMPLE_PATH_FLOW = '../input/v_CricketShot_g04_c01_flow.npy'
CHECKPOINT_PATHS = {
		'rgb_imagenet': '../../finetune_i3d/output/finetune-siddata1-rgb-1/siddata1_rgb_0.306_models1-145'
		
}
LABEL_MAP_PATH = '../models/Label_map.txt'



tf.reset_default_graph()
eval_type = EVAL_TYPE
# imagenet_pretrained = True

# NUM_CLASSES = 400

if eval_type == 'rgb600':
	NUM_CLASSES = 600

if eval_type not in ['rgb', 'rgb600', 'flow', 'joint']:
	raise ValueError('Bad `eval_type`, must be one of rgb, flow, joint')

if eval_type == 'rgb600':
	kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH_600)]
else:
	kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH)]


##############
# Model Load #
##############
'''
This will load the model from the saved model directory.
'''

if eval_type in ['rgb', 'rgb600', 'joint']:
	
	rgb_input = tf.placeholder(
			tf.float32,
			shape=(1, SAMPLE_VIDEO_FRAMES, IMAGE_SIZE, IMAGE_SIZE, 3))


	with tf.variable_scope('RGB'):
		rgb_model = i3d.InceptionI3d(
				NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits')
		rgb_logits, _ = rgb_model(
				rgb_input, is_training=False, dropout_keep_prob=1.0)


	rgb_variable_map = {}
	for variable in tf.global_variables():

		if variable.name.split('/')[0] == 'RGB':
			if eval_type == 'rgb600':
				rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d/'):]] = variable
			else:
				rgb_variable_map[variable.name.replace(':0', '')] = variable

	rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

if eval_type in ['flow', 'joint']:
	# Flow input has only 2 channels.
	flow_input = tf.placeholder(
			tf.float32,
			shape=(1, SAMPLE_VIDEO_FRAMES, IMAGE_SIZE, IMAGE_SIZE, 2))
	with tf.variable_scope('Flow'):
		flow_model = i3d.InceptionI3d(
				NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits')
		flow_logits, _ = flow_model(
				flow_input, is_training=False, dropout_keep_prob=1.0)
	flow_variable_map = {}
	for variable in tf.global_variables():
		if variable.name.split('/')[0] == 'Flow':
			flow_variable_map[variable.name.replace(':0', '')] = variable
	flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True)

if eval_type == 'rgb' or eval_type == 'rgb600':
	model_logits = rgb_logits
elif eval_type == 'flow':
	model_logits = flow_logits
else:
	model_logits = rgb_logits + flow_logits
model_predictions = tf.nn.softmax(model_logits)

##############
# Input Load #
##############
'''
This will load the input files,
if you have choose rgb then it will read only rgb ,
if you have choose flow then it will load rgb and flow input
'''
def crop_center_square(frame):
	y,x = frame.shape[0:2]
	min_dim = min(y,x)
	start_x = (x // 2) - (min_dim // 2)
	start_y = (y // 2) - (min_dim // 2)
	return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]

def load_video(path,max_frames=SAMPLE_VIDEO_FRAMES,resize=(224,224)):
	cap = cv2.VideoCapture(path)
	frames=[]
	try:
		while True:
			ret,frame = cap.read()
			if not ret:
				break
			frame = crop_center_square(frame)
			frame = cv2.resize(frame,resize)
			frame = frame[:,:,[2,1,0]]
			frames.append(frame)

			if len(frames) == max_frames:
				break
	finally:
		cap.release()
	# print(len(frames))
	return [np.array(frames)/255.0]

with tf.Session() as sess:
	feed_dict = {}
	if eval_type in ['rgb', 'rgb600', 'joint']:
		if imagenet_pretrained:
			rgb_saver.restore(sess, CHECKPOINT_PATHS['rgb_imagenet'])
		else:
			rgb_saver.restore(sess, CHECKPOINT_PATHS[eval_type])
		print("video shape")
		# print(load_video(SAMPLE_PATH_VIDEO).shape)	

		if SAMPLE_PATH_RGB is None:
			rgb_sample = load_video(SAMPLE_PATH_VIDEO)
		else:
			rgb_sample = no.load(SAMPLE_PATH_RGB)


		
		feed_dict[rgb_input] = rgb_sample

	if eval_type in ['flow', 'joint']:
		if imagenet_pretrained:
			flow_saver.restore(sess, CHECKPOINT_PATHS['flow_imagenet'])
		else:
			flow_saver.restore(sess, CHECKPOINT_PATHS['flow'])
		
		flow_sample = np.load(SAMPLE_PATH_FLOW)
		
		feed_dict[flow_input] = flow_sample


	##############
	# Prediction #
	##############		

	out_logits, out_predictions = sess.run(
			[model_logits, model_predictions],
			feed_dict=feed_dict)

	out_logits = out_logits[0]
	out_predictions = out_predictions[0]
	sorted_indices = np.argsort(out_predictions)[::-1]

	print('Norm of logits: %f' % np.linalg.norm(out_logits))
	print('\nTop classes and probabilities')
	# for index in sorted_indices[:TOP_RESULT_CLASSES]:
		# print(out_predictions[index], out_logits[index], kinetics_classes[index])
	print(sorted_indices)
	# return kinetics_classes[sorted_indices[0]]

main(SAMPLE_PATH_RGB=None) `

This is my code for testing.

Jun 21 '19 14:06 siddharth2022

I3D_Finetune I3D_Finetune copied to clipboard

I finetuned the model for 3 output classes but now at the time of prediction i got the error of input Shape does not match

I3D_Finetune
I3D_Finetune copied to clipboard