There is a common situation when you build your TFRecord file ( your dataset ) and want to verify the correctness of the data in it. How to do it? I assume you don't have the problem to build your TFRecord file. So, the easy way to verify your TFRecord file is to use the API: tf.python_io.tf_record_iterator().
The following points need to check out carefully:
1. Use XXX.value without [] if the restored data is a list
2. For the image part, the data type should be consistent between writing and reading in TFRecord.
The example code:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
def read_ftrecord(tffile):
reader = tf.TFRecordReader()
example = tf.train.Example()
for r in tf.python_io.tf_record_iterator(tffile):
example.ParseFromString(r)
width = int(example.features.feature['image/width'].int64_list.value[0])
height = int(example.features.feature['image/height'].int64_list.value[0])
filename = example.features.feature['image/filename'].bytes_list.value[0]
encoded = example.features.feature['image/encoded'].bytes_list.value[0]
label = example.features.feature['image/class/label'].int64_list.value
text = example.features.feature['image/class/text'].bytes_list.value[0]
data = {
"width": width,
"height": height,
"filename": filename,
"encoded": encoded,
"label": label,
"text": text,
}
yield data
tfrecords_filename = 'my_dataset.tfrecords'
gen_data = read_ftrecord(tfrecords_filename)
for i in range(5):
my_data = gen_data.next()
image_1d = np.fromstring(my_data["encoded"], dtype=np.int8)
image = image_1d.reshape((my_data["width"], my_data["height"]))
print(my_data["width"],
my_data["height"],
my_data["filename"],
image.shape,
my_data["label"],
my_data["text"])
plt.title('image:' + str(i))
plt.imshow(image, plt.cm.gray)
plt.show()
No comments:
Post a Comment