raccoon_dataset
raccoon_dataset copied to clipboard
Empty tfrecord
I have the file train_labels.csv with the following structure
filename,width,height,class,xmin,ymin,xmax,ymax ASG000bdy0.jpg,2048,1536,Zebra,312,679,662,871 ...
and 3700 train images in train/
I used the following code.
from future import division from future import print_function from future import absolute_import
import os import io import pandas as pd import tensorflow as tf
from PIL import Image from object_detection.utils import dataset_util from collections import namedtuple, OrderedDict
flags = tf.app.flags flags.DEFINE_string('csv_input', '', 'Path to the CSV input') flags.DEFINE_string('output_path', '', 'Path to output TFRecord') flags.DEFINE_string('image_dir', '', 'Path to images') FLAGS = flags.FLAGS
def class_text_to_int(row_label): if row_label == 'Impala': return 1 elif row_label == 'Wildebeest': return 2 elif row_label == 'Zebra': return 3 elif row_label == 'Elephant': return 4 elif row_label == 'Hartebeest': return 5 elif row_label == 'Buffalo': return 6 elif row_label == 'GazelleGrants': return 7 elif row_label == 'Giraffe': return 8 elif row_label == 'Reedbuck': return 9 elif row_label == 'Human': return 10 elif row_label == 'GazelleThomsons': return 11 elif row_label == 'Topi': return 12 elif row_label == 'Eland': return 13 elif row_label == 'Lion': return 14 elif row_label == 'HyenaSpotted': return 15 elif row_label == 'Bushbuck': return 16 elif row_label == 'Cheetah': return 17 elif row_label == 'Leopard': return 18 elif row_label == 'Ostrich': return 19 elif row_label == 'Rhinoceros': return 20 elif row_label == 'Waterbuck': return 21 else: None def split(df, group): data = namedtuple('data', ['filename', 'object']) gb = df.groupby(group) return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_): writer = tf.python_io.TFRecordWriter(FLAGS.output_path) path = os.path.join(FLAGS.image_dir) examples = pd.read_csv(FLAGS.csv_input) grouped = split(examples, 'filename') output_path = os.path.join(os.getcwd(), FLAGS.output_path) print('Successfully created the TFRecords: {}'.format(output_path))
if name == 'main': tf.app.run()
When i run the command: python generatetfrecord.py --csv_input=train_labels.csv --image_dir=train --output_path=train.record
I get the following message
Successfully created the TFRecords: C:\Users\XXX\Desktop\tensor\models-master\research\train.record
-> But the created train.record is empty/0 byte big