Julian Carpenter Julian Carpenter - 1 year ago 77
Python Question

Reading in a tfrecord file never finishes

I use this script to download and convert the cifar10 data into a tfrecord file... it finishes without a problem and i have a proper binary file.

i then try to import my file with this script:

"""Provides data for the Cifar10 dataset.
The dataset scripts used to create the dataset can be found at:
tensorflow/models/slim/datasets/download_and_convert_cifar10.py
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import tensorflow as tf

import dataset_utils

slim = tf.contrib.slim

_FILE_PATTERN = 'cifar10_%s.tfrecord'

SPLITS_TO_SIZES = {'train': 50000, 'test': 10000}

_NUM_CLASSES = 10

_ITEMS_TO_DESCRIPTIONS = {
'image': 'A [32 x 32 x 3] color image.',
'label': 'A single integer between 0 and 9',
}


def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
"""Gets a dataset tuple with instructions for reading cifar10.
Args:
split_name: A train/test split name.
dataset_dir: The base directory of the dataset sources.
file_pattern: The file pattern to use when matching the dataset sources.
It is assumed that the pattern contains a '%s' string so that the split
name can be inserted.
reader: The TensorFlow reader type.
Returns:
A `Dataset` namedtuple.
Raises:
ValueError: if `split_name` is not a valid train/test split.
"""
if split_name not in SPLITS_TO_SIZES:
raise ValueError('split name %s was not recognized.' % split_name)

if not file_pattern:
file_pattern = _FILE_PATTERN
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

# Allowing None in the signature so that dataset_factory can use the default.
if not reader:
reader = tf.TFRecordReader

keys_to_features = {
'image/encoded':
tf.FixedLenFeature((), tf.string, default_value=''),
'image/format':
tf.FixedLenFeature((), tf.string, default_value='png'),
'image/class/label':
tf.FixedLenFeature(
[], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
}

items_to_handlers = {
'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]),
'label': slim.tfexample_decoder.Tensor('image/class/label'),
}

decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
items_to_handlers)

labels_to_names = None
if dataset_utils.has_labels(dataset_dir):
labels_to_names = dataset_utils.read_label_file(dataset_dir)

return slim.dataset.Dataset(
data_sources=file_pattern,
reader=reader,
decoder=decoder,
num_samples=SPLITS_TO_SIZES[split_name],
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
num_classes=_NUM_CLASSES,
labels_to_names=labels_to_names)


data = get_split('test', '/path/to/cifar10_dir')

sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True, log_device_placement=False))

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)

data_provider = slim.dataset_data_provider.DatasetDataProvider(
data, num_readers=10, shuffle=True)
img, lbl = data_provider.get(['image', 'label'])


it also gives me no error, but when i try:

sess.run(img)


the process never stops. tensorflow is starting the session but is not reading the file, it won't release the prompt and is basically doing nothing. I'm seriously going mad here because i don't know where to start. is anyone having any advice ?

Answer Source

You should start your queue runners after DatasetDataProvider which is also a queue. So change it to the following order:

data_provider = slim.dataset_data_provider.DatasetDataProvider(
data, num_readers=10, shuffle=True)
img, lbl = data_provider.get(['image', 'label'])

sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download