LipReading icon indicating copy to clipboard operation
LipReading copied to clipboard

prepare_crop_files.py unclear

Open bckenstler opened this issue 8 years ago • 2 comments

Hello, As is, it is clear that prepare_crop_files.py will not work as intended with the download commands commented out. However, this makes it unclear which other lines, if any, should be uncommented to achieve the intended preparation. It would be helpful if you updated this so it works as intended without further modification. Thanks!

bckenstler avatar Dec 28 '17 04:12 bckenstler

I came across the same problem. Here's my version to make it work. Notice that I use opencv3, so I modified the code a little bit. Besides, I find no variable format_num2 in the source code, and I guess it should be count based on the context.

#pylint: skip-file
import os
import fnmatch
import cv2
import numpy as np
import sys

## This code gets range of the speakers in dataset, e.g. python prepare_crop_files.py 3 6
if(len(sys.argv)<3):
	print('Insufficient arguments')
	quit()

start=int(sys.argv[1])
end=int(sys.argv[2])


path='/home/lht/data/GRID'
os.system('mkdir '+path)
os.system('mkdir '+path+'/Audio')
os.system('mkdir '+path+'/Video')

face_cascade = cv2.CascadeClassifier('/usr/local/share/OpenCV/haarcascades/haarcascade_frontalface_default.xml')

ds_factor = 0.5

#for i in range(start,end):
for i in range(0, 0):

	if i==21:
		continue

	os.chdir(path+'/Audio')
	file='s'+str(i)+'.tar'
	link='http://spandh.dcs.shef.ac.uk/gridcorpus/s'+str(i)+'/audio/'+file

	#downloading and unzipping audio for person 1
	os.system('wget '+link)
	os.system('tar -xf '+file)
	os.system('rm -f -r '+file)

	#renaming
	print(path+'/Audio/s'+str(i))
	os.chdir(path+'/Audio/s'+str(i))
	os.system('ls *.wav | cat -n | while read n f; do mv "$f" "$(printf %06d $n).wav"; done')



for i in range(start,end):

	if i==21:
		continue

	#print(path+'/Video')
	os.chdir(path+'/Video')

	file='s'+str(i)+'.mpg_vcd.zip'
	link='http://spandh.dcs.shef.ac.uk/gridcorpus/s'+str(i)+'/video/'+file

	#downloading and unzipping video for person 1
	#os.system('wget '+link)
	#os.system('unzip '+file)
	#os.system('rm -f -r '+file)

	#renaming 
	#print(path+'/Video/s'+str(i)) 
	os.chdir(path+'/Video/s'+str(i))
	os.system('ls *.mpg | cat -n | while read n f; do mv "$f" "$(printf %06d $n).mpg"; done')

	#cropping faces, creating new video, stabilizing new video 
	source_path=path+'/Video/s'+str(i)+'/'
	if not os.path.exists(source_path+'face'):
		os.mkdir(source_path+'face') 
	# os.chdir(source_path+'face')

	numfiles=len(fnmatch.filter(os.listdir(path+'/Video/s'+str(i)), '*.mpg'))
	for j in range(1,numfiles+1):
	# for j in range(1,2):

		format_num1="{number:06}".format(number=j)

		os.system('mkdir -p '+source_path+'frames/'+str(format_num1))
		print('Reading video from : '+source_path+str(format_num1)+'.mpg')

		cap = cv2.VideoCapture(source_path+str(format_num1)+'.mpg')

		print('Writing video : '+source_path+'face/'+str(format_num1)+'.avi')
		out = cv2.VideoWriter()
		if cv2.__version__[0] == '2':
			# opencv 2
			fourcc = cv2.cv.CV_FOURCC('m','p','4','v')
		else:
			# opencv 3
			fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
		success = out.open(source_path+'face/'+str(format_num1)+'.avi', fourcc, 25.0, (128,128),False)
		print('Success: '+str(success))

		print('Writing frames to : '+source_path+'frames/'+str(format_num1)+'/')
		count=0

		while(cap.isOpened()):
			count=count+1
			ret, frame = cap.read()
			if ret==False:
				break
			#cropping face
			gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

			if count==1:
				face_rects = face_cascade.detectMultiScale(gray,1.05,3,minSize=(128,128))
			if face_rects==():
        		    #count-=1
        		    #continue;
        		    break

			x,y,w,h= face_rects[0]
			#136,210,49,29
			inc=30
			x=136-inc
			y=210-int(inc/2)
			w=49+(2*inc)
			h=29+(inc)
			roi=gray[y:y+h,x:x+w]
			print(str(x)+','+str(y)+','+str(w)+','+str(h))
			#resizing 
			roi=cv2.resize(roi,(128,128))

			# cv2.imwrite(source_path+'frames/'+str(format_num1)+'/'+str(format_num2)+'.jpg',roi,)
			cv2.imwrite(source_path+'frames/'+str(format_num1)+'/'+str(count)+'.jpg',roi,)
			# writing video (unstabilized)
			out.write(roi)
		cap.release()
		out.release()

		with open(path+'/log.txt', 'a') as file:
			file.write(source_path+'frames/'+str(format_num1)+'.mpg '+str(count)+' '+str(success))

		#stabilizing video 
		# os.chdir("..")
		os.system('ffmpeg -i '+ str(format_num1)+'.mpg' +' -vf vidstabdetect -f null -')
		os.system('ffmpeg -i '+ str(format_num1)+'.mpg' +' -vf vidstabtransform=smoothing=5:input="transforms.trf" '+ 's_'+str(format_num1)+'.mpg' )

liuhantang avatar Dec 29 '17 02:12 liuhantang

@bckenstler Will you finally reproduce this result? What is the recognition effect?

chen-ASR avatar Apr 15 '19 08:04 chen-ASR