William Jockusch William Jockusch - 3 months ago 21
Python Question

Training TensorFlow to predict a sum

The TensorFlow provided examples are a little complicated for getting started, so I am trying to teach TensorFlow train a neural network to predict the sum of three binary digits. The network gets two of them as inputs; the third one is unknown. So an "optimal" network would guess that the sum will be the sum of the two known bits, plus 1/2 for the unknown bit. Let's say that the "loss" function is the square of the difference between the value predicted by the network and the actual value.

I have written code to generate the trials:

import tensorflow as tf
import numpy as np
from random import randint

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_integer('batch_size', 5, 'Batch size. ')
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('dim1', 3, 'layer size')
flags.DEFINE_integer('training_epochs', 10, 'Number of passes through the main training loop')


def ezString(list):
#debugging code so I can see what is going on
listLength = len(list)
r = ''
for i in range(listLength):
value = list[i]
valueString = str(value)
r = r + ' '
r = r + valueString
return r

def generateTrial():
inputs = np.zeros(2, dtype=np.int)
for i in range(2):
inputs[i] = randint(0,1)
unknownInput = randint(0,1)
sum = 0
for j in range(2):
sum = sum + inputs[j]
sum = sum + unknownInput
inputTensor = tf.pack(inputs)
print 'inputs' + ezString(inputs)
print 'unknown ' + str(unknownInput)
print 'sum ' + str(sum)
print ''
return inputTensor, sum

def printTensor(tensor):
sh = tensor.get_shape()
print(sh)

def placeholder_inputs(size):
output_placeholder = tf.placeholder(tf.int32, shape=(size))
input_placeholder = tf.placeholder(tf.int32, shape=(size,
2))
return input_placeholder, output_placeholder

def fill_feed_dict(inputs_pl, output_pl):
print ('Filling feed dict')
inputs_placeholder, output_placeholder = placeholder_inputs(FLAGS.batch_size)
inputs = []
outputs = []
for i in range(FLAGS.batch_size):
input, output = generateTrial()
inputTensor = tf.pack(input)
inputs.append(input)
outputs.append(output)
inputs_placeholder = tf.pack(inputs)
outputs_placeholder = tf.pack(outputs)

def run_training():
input_placeholder, output_placeholder = placeholder_inputs(FLAGS.batch_size)
fill_feed_dict(input_placeholder, output_placeholder)
printTensor(input_placeholder)
printTensor(output_placeholder)




run_training()


The output suggests that this much is working:

Filling feed dict
inputs 1 0
unknown 0
sum 1

inputs 1 0
unknown 1
sum 2

inputs 0 1
unknown 1
sum 2

inputs 0 1
unknown 0
sum 1

inputs 0 0
unknown 0
sum 0

(5, 2)
(5,)


But I'm unclear on how I would finish it up. In particular, I need to define a loss function, and I also need to hook things up so that the outputs from my network get used to generate guesses for further training steps. Can anyone help?

Answer

I'm not sure whether this code is what you wanted to get, but i hope you would find it useful anyway. Mean squared error is actually decreasing along the iterations, though I haven't tested it for making predictions, so it's up to you!

import tensorflow as tf
import numpy as np
from random import randint

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_integer('batch_size', 50, 'Batch size.')
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('dim1', 3, 'layer size')
flags.DEFINE_integer('training_epochs', 10, 'Number of passes through the main training loop')
flag.DEFINE_integer('num_iters', 100, 'Number of iterations')


def ezString(list):
    #debugging code so I can see what is going on
    listLength = len(list)
    r = ''
    for i in range(listLength):
        value = list[i]
        valueString = str(value)
        r = r + ' '
        r = r + valueString
    return r

def generateTrial():
    inputs = np.zeros(2, dtype = np.float)
    for i in range(2):
        inputs[i] = randint(0, 1)
    unknownInput = randint(0, 1)
    um = 0
    for j in range(2):
        sum = sum + inputs[j]
    sum = sum + unknownInput
    inputTensor = np.asarray(inputs)
    return inputTensor, sum

def printTensor(tensor):
    sh = tensor.get_shape()
    print(sh)

def placeholder_inputs(size):
    output_placeholder = tf.placeholder(tf.float32, shape=(size))
    input_placeholder = tf.placeholder(tf.float32, shape=(size, 2))
    return input_placeholder, output_placeholder

def fill_feed_dict(inputs_pl, output_pl):
    inputs = []
    outputs = []
    for i in range(FLAGS.batch_size):
        input, output = generateTrial()
        inputs.append(input)
        outputs.append(output)

    return {inputs_pl: inputs, output_pl: outputs}

def loss(y, pred):
    return tf.reduce_mean(tf.pow(y - pred, 2))

def NN(x, y, W1, b1, W2, b2):
    layer1 = tf.add(tf.matmul(x, W1), b1)
    layer1 = tf.nn.relu(layer1)
    output = tf.add(tf.matmul(layer1, W2), b2)
    return output, loss(y, output)

def get_params(dim_hidden):
    with tf.variable_scope('nn_params'):
        return tf.Variable(tf.truncated_normal([2, dim_hidden], stddev = 0.05)), tf.Variable(0.0, (dim_hidden)),\
        tf.Variable(tf.truncated_normal([dim_hidden, 1], stddev = 0.05)), tf.Variable(0.0, 1)

def run_training():
    input_placeholder, output_placeholder = placeholder_inputs(FLAGS.batch_size)
    W1, b1, W2, b2 = get_params(FLAGS.dim1)
    pred, loss = NN(input_placeholder, output_placeholder, W1, b1, W2, b2)
    optm = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss)
    init = tf.initialize_all_variables()
    sess = tf.Session()
    sess.run(init)

    for iters in range(FLAGS.num_iters):
        l, _ = sess.run([loss, optm], feed_dict = fill_feed_dict(input_placeholder, output_placeholder))
        print l, iters + 1