user3436204 user3436204 - 5 months ago 35
Python Question

Tensorflow doesn't like pandas dataframe?

I was trying out tensorflow with the Titanic data from Kaggle:

Here's the code I tried to implement from Sendex:

import tensorflow as tf
import cleanData
import numpy as np

train, test = cleanData.read_and_clean()
train = train[['Pclass', 'Sex', 'Age', 'Fare', 'Child', 'Fam_size', 'Title', 'Mother', 'Survived']]

# one hot
train['Died'] = int('0')
train["Died"][train["Survived"] == 0] = 1


n_nodes_hl1 = 500
n_classes = 2
batch_size = 100

# tf graph input
x = tf.placeholder("float", [None, 8])
y = tf.placeholder("float")

def neural_network_model(data):

hidden_layer_1 = {'weights':tf.Variable(tf.random_normal([8, n_nodes_hl1])),

output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1, n_classes])),

l1 = tf.add(tf.matmul(data, hidden_layer_1['weights']), hidden_layer_1['biases'])
l1 = tf.nn.relu(l1)

output = tf.matmul(l1, output_layer['weights']) + output_layer['biases']

return output

def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(prediction,y))
optimizer = tf.train.AdamOptimizer().minimize(cost)

desired_epochs = 10

with tf.Session() as sess:

for epoch in range(desired_epochs):
epoch_loss = 0
for _ in range(int(train.shape[0])/batch_size):
x_epoch, y_epoch = train.next_batch(batch_size)
_, c =[optimizer, cost], feed_dict= {x:x, y:y})
epoch_loss += c
print('Epoch', epoch, 'completed out of', desired_epochs, 'loss:', epoch_loss)

correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Training accuracy:', accuracy.eval({x:x, y:y}))


When I ran the code I got an error that said: "W tensorflow/core/framework/] Invalid argument: shape must be a vector of {int32,int64}, got shape []"

Is there a way around this? I saw a post on Github for tensorflow's code and apparently the library doesn't take pandas dataframe as an input..


I think the error is on this line:

    hidden_layer_1 = {'weights': tf.Variable(tf.random_normal([8, n_nodes_hl1])),
                      'biases': tf.Variable(tf.random_normal(n_nodes_hl1))}

The shape argument to tf.random_normal() must be a 1-D vector (or list, or array) of integers. For the 'biases' variable, you're passing a single integer, n_nodes_hl1. The fix is simple, just wrap that argument in a list:

    hidden_layer_1 = {...,
                      'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))}