Link Link - 1 month ago 34
Python Question

How to *correctly* read data from csv's into TensorFlow

I came across this so posting showing us how to setup the code to read in csv files using a queue. However, each time I run it, I run into an error. I've tried debugging it, but can't figure out what the error means. Can anyone help me out?

The code I'm using is almost verbatim what was posted in the above post:

import tensorflow as tf

dataset = '/Users/hdadmin/Data/actions/testing.csv'

def file_len(fname):
with open(fname) as f:
for i, l in enumerate(f):
pass
return i + 1

def read_from_csv(filename_queue):
reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = reader.read(filename_queue)
record_defaults = [[0],[0],[0],[0],[0]]
colHour,colQuarter,colAction,colUser,colLabel = tf.decode_csv(csv_row, record_defaults=record_defaults)
features = tf.pack([colHour,colQuarter,colAction,colUser])
label = tf.pack([colLabel])
return features, label

def input_pipeline(batch_size, num_epochs=None):
filename_queue = tf.train.string_input_producer([dataset], num_epochs=num_epochs, shuffle=True)
example, label = read_from_csv(filename_queue)
min_after_dequeue = 1000
capacity = min_after_dequeue + 3 * batch_size
example_batch, label_batch = tf.train.shuffle_batch(
[example, label], batch_size=batch_size, capacity=capacity,
min_after_dequeue=min_after_dequeue)
return example_batch, label_batch

file_length = file_len(dataset) - 1
examples, labels = input_pipeline(file_length, 1)

with tf.Session() as sess:
tf.initialize_all_variables().run()

# start populating filename queue
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)

try:
while not coord.should_stop():
example_batch, label_batch = sess.run([examples, labels])
print(example_batch)
except tf.errors.OutOfRangeError:
print('Done training, epoch reached')
finally:
coord.request_stop()

coord.join(threads)


The error I'm getting is:

E tensorflow/core/client/tensor_c_api.cc:485] Attempting to use uninitialized value input_producer/limit_epochs/epochs
[[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
E tensorflow/core/client/tensor_c_api.cc:485] RandomShuffleQueue '_2_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 10000, current size 0)
[[Node: shuffle_batch = QueueDequeueMany[_class=["loc:@shuffle_batch/random_shuffle_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
Done training, epoch reached
E tensorflow/core/client/tensor_c_api.cc:485] FIFOQueue '_0_input_producer' is closed and has insufficient elements (requested 1, current size 0)
[[Node: ReaderRead = ReaderRead[_class=["loc:@TextLineReader", "loc:@input_producer"], _device="/job:localhost/replica:0/task:0/cpu:0"](TextLineReader, input_producer)]]
E tensorflow/core/client/tensor_c_api.cc:485] Queue '_2_shuffle_batch/random_shuffle_queue' is already closed.
[[Node: shuffle_batch/random_shuffle_queue_Close = QueueClose[_class=["loc:@shuffle_batch/random_shuffle_queue"], cancel_pending_enqueues=false, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue)]]
Traceback (most recent call last):
File "csv_test.py", line 49, in <module>
coord.join(threads)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/coordinator.py", line 357, in join
six.reraise(*self._exc_info_to_raise)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/queue_runner.py", line 185, in _run
sess.run(enqueue_op)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 382, in run
run_metadata_ptr)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 655, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 723, in _do_run
target_list, options, run_metadata)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 743, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.FailedPreconditionError: Attempting to use uninitialized value input_producer/limit_epochs/epochs
[[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
Caused by op u'input_producer/limit_epochs/CountUpTo', defined at:
File "csv_test.py", line 31, in <module>
examples, labels = input_pipeline(file_length, 1)
File "csv_test.py", line 21, in input_pipeline
filename_queue = tf.train.string_input_producer([dataset], num_epochs=num_epochs, shuffle=True)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 194, in string_input_producer
summary_name="fraction_of_%d_full" % capacity)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 133, in input_producer
input_tensor = limit_epochs(input_tensor, num_epochs)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 84, in limit_epochs
counter = epochs.count_up_to(num_epochs)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 577, in count_up_to
return state_ops.count_up_to(self._variable, limit=limit)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_state_ops.py", line 127, in count_up_to
result = _op_def_lib.apply_op("CountUpTo", ref=ref, limit=limit, name=name)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2310, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1232, in __init__
self._traceback = _extract_stack()


I made up data comprised of five columns to match with the example. It's something along the lines of:

"v1","v2","v3","v4","v5"
1,1,1,3,10
4,2,1,10,8
1,4,1,9,3
3,3,1,1,5
3,4,1,4,3
3,2,1,5,8
1,1,1,9,7
4,1,1,4,9
2,3,1,8,4


Thanks ahead of time.

And And
Answer

I think what you are missing is initialization of local variables (e.g. input_producer/limit_epochs/epochs). Local variables are not initialized during initialization of all variables, which btw is quite confusing.

You can add the following initialization operation that will initialize everything at once:

init_op = tf.group(tf.initialize_all_variables(),
                   tf.initialize_local_variables())

and then:

sess.run(init_op)