dantiston dantiston - 10 months ago 169
Python Question

Tensorflow: Global step must be from the same graph as loss

I'm trying to use Tensorflow to do some classification with the tf.contrib.layers package, and I've run into a problem I can't quite figure out. As far as I can tell from examples (e.g. this and it's tutorial), everything with the graph is handled by the API. I can download and run the same code in my environment perfectly well.

However, when I run my code, I get the an error that my global step is not from the same graph as my loss, which seems bizarre:

ValueError: Tensor("global_step:0", shape=(), dtype=int64_ref) must be from the same graph as Tensor("softmax_cross_entropy_loss/value:0", shape=(), dtype=float32).
The error occurs during the construction of the
train_op


Here's my tensorflow code (I do have some other code for handling the loading of the data, but it doesn't use anything from tensorflow). Sorry that the code is sort of messy right now: I've been tearing it apart trying to figure this error out.

import numpy as np
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib

import data # my data loading module


def train(training_file, vocab_path, hidden_units=[10, 20, 10], estimator=tf.contrib.learn.DNNClassifier):
"""
Given a training CSV file, train a Tensorflow neural network
"""

training_set = data.load(training_file)

vocab = tf.contrib.learn.preprocessing.VocabularyProcessor(data.DOC_LENGTH)
vocab = vocab.restore(vocab_path)
training_data = tf.one_hot(training_set.data, len(vocab.vocabulary_._mapping), dtype=tf.float32)
training_targets = tf.constant(np.array(training_set.targets, dtype=np.int32))

classifier = tf.contrib.learn.Estimator(model_fn=lambda features, targets, mode, params: model_fn(features, targets, mode, params, hidden_units))

classifier.fit(input_fn=lambda: (training_data, training_targets), steps=2000)

return classifier


def model_fn(features, targets, mode, params, hidden_units):
if len(hidden_units) <= 0:
raise ValueError("Hidden units must be a iterable of ints of length >= 1")

# Define the network
network = tf.contrib.layers.relu(features, hidden_units[0])
for i in range(1, len(hidden_units)):
network = tf.contrib.layers.relu(network, hidden_units[i])

# Flatten the network
network = tf.reshape(network, [-1, hidden_units[-1] * data.DOC_LENGTH])

# Add dropout to enhance feature use
network = tf.layers.dropout(inputs=network, rate=0.5, training=mode == tf.contrib.learn.ModeKeys.TRAIN)

# Calculate the logits
logits = tf.contrib.layers.fully_connected(network, 15)

loss = None
train_op = None

if mode != tf.contrib.learn.ModeKeys.INFER:
targets = tf.cast(tf.one_hot(targets, 15, 1, 0), dtype=tf.float32)
loss = tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=targets)

if mode == tf.contrib.learn.ModeKeys.TRAIN:
# This train_op causes the error
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.train.get_global_step(),
optimizer='Adam',
learning_rate=0.01)

predictions = {
"classes": tf.argmax(input=logits, axis=1),
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}

return model_fn_lib.ModelFnOps(mode=mode, predictions=predictions, loss=loss, train_op=train_op)


def main(unusedargv):

# ... parses arguments

classifier = train(args.train_data, args.vocab)
print(evaluate(classifier, args.train_data))
print(evaluate(classifier, args.test_data))


if __name__ == "__main__":
tf.app.run()


Here's the full stack trace:

File "categorize.py", line 126, in main
classifier = train(args.train_data, args.vocab)
File "categorize.py", line 39, in train
classifier.fit(input_fn=lambda: (training_data, training_targets), steps=2000)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 280, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 426, in fit
loss = self._train_model(input_fn=input_fn, hooks=hooks)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 934, in _train_model
model_fn_ops = self._call_legacy_get_train_ops(features, labels)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1003, in _call_legacy_get_train_ops
train_ops = self._get_train_ops(features, labels)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1162, in _get_train_ops
return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1133, in _call_model_fn
model_fn_results = self._model_fn(features, labels, **kwargs)
File "categorize.py", line 37, in <lambda>
classifier = tf.contrib.learn.Estimator(model_fn=lambda features, targets, mode, params: model_fn(features, targets, mode, params, hidden_units))
File "categorize.py", line 73, in model_fn
learning_rate=0.01)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/optimizers.py", line 152, in optimize_loss
with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
File "/usr/local/Cellar/python3/3.6.0_1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/contextlib.py", line 82, in __enter__
return next(self.gen)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 1410, in variable_scope
g = ops._get_graph_from_inputs(values) # pylint: disable=protected-access
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3968, in _get_graph_from_inputs
_assert_same_graph(original_graph_element, graph_element)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3907, in _assert_same_graph
"%s must be from the same graph as %s." % (item, original_item))
ValueError: Tensor("global_step:0", shape=(), dtype=int64_ref) must be from the same graph as Tensor("softmax_cross_entropy_loss/value:0", shape=(), dtype=float32).


Here's my code:

Answer Source

I figured out the problem! This may specified have to do with the Estimator interface, but basically I needed to move my tensorflow variable definition into the Estimator. I ended up making a method to do this, but it also worked when I defined the variables in the lambda:

def train(training_file, vocab_path, hidden_units=[10, 20, 10]):
    """
    Given a training CSV file, train a Tensorflow neural network
    """

    training_set = data.load(training_file)

    vocab = tf.contrib.learn.preprocessing.VocabularyProcessor(data.DOC_LENGTH)
    vocab = vocab.restore(vocab_path)
    # Note not defining the variables here
    training_data = training_set.data
    training_targets = np.array(training_set.targets, dtype=np.int32)

    classifier = tf.contrib.learn.Estimator(model_fn=lambda features, targets, mode, params: model_fn(features, targets, mode, params, hidden_units))

    # Note the variable definition here
    classifier.fit(
        input_fn=lambda: 
            (tf.one_hot(training_data, len(vocab.vocabulary_._mapping), dtype=tf.float32)
             tf.constant(training_targets)),
        steps=2000))

    return classifier
Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download