clickListener clickListener - 1 month ago 16
Python Question

Tensorflow: Error rate doesn't improve even with more iterations or changing learning rate

I'm using tensorflow to deploy my own simple CNN with mnist dataset.
I've finally deployed my convnet and the code can run without any error or warning. However, the error rate in the terminal is always 0.098 and doesn't improve even if I change the number of iterations, or the learning rate. Where did i go wrong? Can anyone help me?

My code (MyConvNet.py):

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from MyNet import weight_variable,bias_variable,conv_layer,pooling_layer,relu_layer,fully_connecd,softmax_layer


def compute_accuracy(v_xs,v_ys):
global prediction
y_pre = sess.run(prediction,feed_dict={xs:v_xs})
correct_prediction = tf.equal(tf.argmax(y_pre,1),tf.argmax(v_ys,1))
acc = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
result = sess.run(acc,feed_dict={xs:v_xs,ys:v_ys})
return result


xs = tf.placeholder(tf.float32,[None,784])
ys = tf.placeholder(tf.float32,[None,10])
x_img = tf.reshape(xs,[-1,28,28,1])

########## LAYER DEFINITION START ##########
# layer 1
conv1_w = weight_variable([5,5,1,6]) # [cols,rows,channels,n]
conv1_b = bias_variable([6])
# [28*28*1]->[24*24*6]
conv1 = conv_layer(x_img, conv1_w, name='conv1') + conv1_b
# [24*24*6]->[12*12*6]
pool1 = pooling_layer(conv1, name='pool1')
relu1 = relu_layer(pool1,name='relu1')

# layer 2
conv2_w = weight_variable([5,5,6,16]) # [cols,rows,channels,n]
conv2_b = bias_variable([16])
# [12*12*6]->[8*8*16]
conv2 = conv_layer(relu1, conv2_w, name='conv2') + conv2_b
# [8*8*16]->[4*4*16]
pool2 = pooling_layer(conv2, name='pool2')
relu2 = relu_layer(pool2, name='relu2')

# layer 3 (fc)
fc_in_size = (relu2.get_shape()[1]*relu2.get_shape()[2]*relu2.get_shape()[3]).value
fc3_w = weight_variable([fc_in_size,120])
fc3_b = bias_variable([120])
relu2_col = tf.reshape(relu2,[-1,fc_in_size])
fc3 = fully_connecd(relu2_col,fc3_w, name='fc3')+fc3_b
relu3 = relu_layer(fc3, name='relu3')

# layer 4 (fc)
fc4_w = weight_variable([120,10])
fc4_b = bias_variable([10])
fc4 = fully_connecd(relu3,fc4_w, name='fc3')+fc4_b
relu4 = relu_layer(fc4, name='relu4')

# layer 5 (prediction)
prediction = softmax_layer(relu4)

# training solver
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction),
reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(cross_entropy)
########## LAYER DEFINITION END ##########



# start training
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for step in range(500):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step,feed_dict={xs:batch_xs, ys:batch_ys})
if step % 50 == 0:
print( compute_accuracy(mnist.test.images, mnist.test.labels) )

sess.close()


and here is MyNet.py

import tensorflow as tf

def weight_variable(shape,stddev=0.1):
init = tf.truncated_normal(shape,stddev)
return tf.Variable(init)

def bias_variable(shape):
init = tf.constant(0.1,shape=shape)
return tf.Variable(init)

def conv_layer(bottom,Weights,name='conv_layer'):
with tf.name_scope(name):
# stride=[1,x_dir,y_dir,1]
return tf.nn.conv2d(bottom,Weights,strides=[1,1,1,1], padding='VALID')

def pooling_layer(bottom,name='pooling_layer'):
with tf.name_scope(name):
return tf.nn.max_pool(bottom,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')

def relu_layer(bottom,name='relu_layer'):
with tf.name_scope(name):
return tf.nn.relu(bottom)

def fully_connecd(bottom,Weights,name='fc'):
with tf.name_scope(name):
return tf.matmul(bottom,Weights)

def softmax_layer(bottom,name='softmax'):
with tf.name_scope(name):
return tf.nn.softmax(bottom)


here is the output from the terminal

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
0.098
0.098
0.098
0.098
0.098
0.098
0.098
0.098
0.098
0.098
>>>

Answer

In your weight initialization replace tf.truncated_normal(shape,stddev) with:

def weight_variable(shape,stddev=0.1):
    init = tf.truncated_normal(shape,stddev = stddev)
    return tf.Variable(init)

See my explanation in the comment. To illustrate:

test=tf.truncated_normal([1000000], 0.1)
plt.hist(test.eval(session=sess),50)
test2=tf.truncated_normal([1000000], stddev=.1)
plt.hist(test2.eval(session=sess),50) 

enter image description here