Tensorflow:即使进行更多迭代或改变学习率,错误率也不会提高
Tensorflow: Error rate doesn't improve even with more iterations or changing learning rate
我正在使用 tensorflow 部署我自己的带有 mnist 数据集的简单 CNN。
我终于部署了我的 convnet,代码可以 运行 没有任何错误或警告。但是,终端中的错误率始终为 0.098,即使我更改迭代次数或学习率也不会改善。我哪里做错了?谁能帮我?
我的代码(MyConvNet.py):
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from MyNet import weight_variable,bias_variable,conv_layer,pooling_layer,relu_layer,fully_connecd,softmax_layer
def compute_accuracy(v_xs,v_ys):
global prediction
y_pre = sess.run(prediction,feed_dict={xs:v_xs})
correct_prediction = tf.equal(tf.argmax(y_pre,1),tf.argmax(v_ys,1))
acc = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
result = sess.run(acc,feed_dict={xs:v_xs,ys:v_ys})
return result
xs = tf.placeholder(tf.float32,[None,784])
ys = tf.placeholder(tf.float32,[None,10])
x_img = tf.reshape(xs,[-1,28,28,1])
########## LAYER DEFINITION START ##########
# layer 1
conv1_w = weight_variable([5,5,1,6]) # [cols,rows,channels,n]
conv1_b = bias_variable([6])
# [28*28*1]->[24*24*6]
conv1 = conv_layer(x_img, conv1_w, name='conv1') + conv1_b
# [24*24*6]->[12*12*6]
pool1 = pooling_layer(conv1, name='pool1')
relu1 = relu_layer(pool1,name='relu1')
# layer 2
conv2_w = weight_variable([5,5,6,16]) # [cols,rows,channels,n]
conv2_b = bias_variable([16])
# [12*12*6]->[8*8*16]
conv2 = conv_layer(relu1, conv2_w, name='conv2') + conv2_b
# [8*8*16]->[4*4*16]
pool2 = pooling_layer(conv2, name='pool2')
relu2 = relu_layer(pool2, name='relu2')
# layer 3 (fc)
fc_in_size = (relu2.get_shape()[1]*relu2.get_shape()[2]*relu2.get_shape()[3]).value
fc3_w = weight_variable([fc_in_size,120])
fc3_b = bias_variable([120])
relu2_col = tf.reshape(relu2,[-1,fc_in_size])
fc3 = fully_connecd(relu2_col,fc3_w, name='fc3')+fc3_b
relu3 = relu_layer(fc3, name='relu3')
# layer 4 (fc)
fc4_w = weight_variable([120,10])
fc4_b = bias_variable([10])
fc4 = fully_connecd(relu3,fc4_w, name='fc3')+fc4_b
relu4 = relu_layer(fc4, name='relu4')
# layer 5 (prediction)
prediction = softmax_layer(relu4)
# training solver
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction),
reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(cross_entropy)
########## LAYER DEFINITION END ##########
# start training
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for step in range(500):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step,feed_dict={xs:batch_xs, ys:batch_ys})
if step % 50 == 0:
print( compute_accuracy(mnist.test.images, mnist.test.labels) )
sess.close()
这里是MyNet.py
import tensorflow as tf
def weight_variable(shape,stddev=0.1):
init = tf.truncated_normal(shape,stddev)
return tf.Variable(init)
def bias_variable(shape):
init = tf.constant(0.1,shape=shape)
return tf.Variable(init)
def conv_layer(bottom,Weights,name='conv_layer'):
with tf.name_scope(name):
# stride=[1,x_dir,y_dir,1]
return tf.nn.conv2d(bottom,Weights,strides=[1,1,1,1], padding='VALID')
def pooling_layer(bottom,name='pooling_layer'):
with tf.name_scope(name):
return tf.nn.max_pool(bottom,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
def relu_layer(bottom,name='relu_layer'):
with tf.name_scope(name):
return tf.nn.relu(bottom)
def fully_connecd(bottom,Weights,name='fc'):
with tf.name_scope(name):
return tf.matmul(bottom,Weights)
def softmax_layer(bottom,name='softmax'):
with tf.name_scope(name):
return tf.nn.softmax(bottom)
这是终端的输出
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
0.098
0.098
0.098
0.098
0.098
0.098
0.098
0.098
0.098
0.098
>>>
您的代码看起来正确,但您使用简单的梯度下降优化器训练了一个 5 层神经网络,只有 500 次迭代。这不够。我建议你:
- 在训练期间打印交叉熵(如果它没有减少:你
您的代码中可能有错误)
- 增加迭代次数(例如 10,000)
- 将优化器更改为加速学习(Momentum 或类似 Adam
here)
在您的权重初始化中,将 tf.truncated_normal(shape,stddev) 替换为:
def weight_variable(shape,stddev=0.1):
init = tf.truncated_normal(shape,stddev = stddev)
return tf.Variable(init)
看我评论里的解释。举例说明:
test=tf.truncated_normal([1000000], 0.1)
plt.hist(test.eval(session=sess),50)
test2=tf.truncated_normal([1000000], stddev=.1)
plt.hist(test2.eval(session=sess),50)
我正在使用 tensorflow 部署我自己的带有 mnist 数据集的简单 CNN。 我终于部署了我的 convnet,代码可以 运行 没有任何错误或警告。但是,终端中的错误率始终为 0.098,即使我更改迭代次数或学习率也不会改善。我哪里做错了?谁能帮我?
我的代码(MyConvNet.py):
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from MyNet import weight_variable,bias_variable,conv_layer,pooling_layer,relu_layer,fully_connecd,softmax_layer
def compute_accuracy(v_xs,v_ys):
global prediction
y_pre = sess.run(prediction,feed_dict={xs:v_xs})
correct_prediction = tf.equal(tf.argmax(y_pre,1),tf.argmax(v_ys,1))
acc = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
result = sess.run(acc,feed_dict={xs:v_xs,ys:v_ys})
return result
xs = tf.placeholder(tf.float32,[None,784])
ys = tf.placeholder(tf.float32,[None,10])
x_img = tf.reshape(xs,[-1,28,28,1])
########## LAYER DEFINITION START ##########
# layer 1
conv1_w = weight_variable([5,5,1,6]) # [cols,rows,channels,n]
conv1_b = bias_variable([6])
# [28*28*1]->[24*24*6]
conv1 = conv_layer(x_img, conv1_w, name='conv1') + conv1_b
# [24*24*6]->[12*12*6]
pool1 = pooling_layer(conv1, name='pool1')
relu1 = relu_layer(pool1,name='relu1')
# layer 2
conv2_w = weight_variable([5,5,6,16]) # [cols,rows,channels,n]
conv2_b = bias_variable([16])
# [12*12*6]->[8*8*16]
conv2 = conv_layer(relu1, conv2_w, name='conv2') + conv2_b
# [8*8*16]->[4*4*16]
pool2 = pooling_layer(conv2, name='pool2')
relu2 = relu_layer(pool2, name='relu2')
# layer 3 (fc)
fc_in_size = (relu2.get_shape()[1]*relu2.get_shape()[2]*relu2.get_shape()[3]).value
fc3_w = weight_variable([fc_in_size,120])
fc3_b = bias_variable([120])
relu2_col = tf.reshape(relu2,[-1,fc_in_size])
fc3 = fully_connecd(relu2_col,fc3_w, name='fc3')+fc3_b
relu3 = relu_layer(fc3, name='relu3')
# layer 4 (fc)
fc4_w = weight_variable([120,10])
fc4_b = bias_variable([10])
fc4 = fully_connecd(relu3,fc4_w, name='fc3')+fc4_b
relu4 = relu_layer(fc4, name='relu4')
# layer 5 (prediction)
prediction = softmax_layer(relu4)
# training solver
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction),
reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(cross_entropy)
########## LAYER DEFINITION END ##########
# start training
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for step in range(500):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step,feed_dict={xs:batch_xs, ys:batch_ys})
if step % 50 == 0:
print( compute_accuracy(mnist.test.images, mnist.test.labels) )
sess.close()
这里是MyNet.py
import tensorflow as tf
def weight_variable(shape,stddev=0.1):
init = tf.truncated_normal(shape,stddev)
return tf.Variable(init)
def bias_variable(shape):
init = tf.constant(0.1,shape=shape)
return tf.Variable(init)
def conv_layer(bottom,Weights,name='conv_layer'):
with tf.name_scope(name):
# stride=[1,x_dir,y_dir,1]
return tf.nn.conv2d(bottom,Weights,strides=[1,1,1,1], padding='VALID')
def pooling_layer(bottom,name='pooling_layer'):
with tf.name_scope(name):
return tf.nn.max_pool(bottom,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
def relu_layer(bottom,name='relu_layer'):
with tf.name_scope(name):
return tf.nn.relu(bottom)
def fully_connecd(bottom,Weights,name='fc'):
with tf.name_scope(name):
return tf.matmul(bottom,Weights)
def softmax_layer(bottom,name='softmax'):
with tf.name_scope(name):
return tf.nn.softmax(bottom)
这是终端的输出
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
0.098
0.098
0.098
0.098
0.098
0.098
0.098
0.098
0.098
0.098
>>>
您的代码看起来正确,但您使用简单的梯度下降优化器训练了一个 5 层神经网络,只有 500 次迭代。这不够。我建议你:
- 在训练期间打印交叉熵(如果它没有减少:你 您的代码中可能有错误)
- 增加迭代次数(例如 10,000)
- 将优化器更改为加速学习(Momentum 或类似 Adam here)
在您的权重初始化中,将 tf.truncated_normal(shape,stddev) 替换为:
def weight_variable(shape,stddev=0.1):
init = tf.truncated_normal(shape,stddev = stddev)
return tf.Variable(init)
看我评论里的解释。举例说明:
test=tf.truncated_normal([1000000], 0.1)
plt.hist(test.eval(session=sess),50)
test2=tf.truncated_normal([1000000], stddev=.1)
plt.hist(test2.eval(session=sess),50)