在时间序列 RNN 初学者块中重塑数组
Reshaping arrays in Time-Series RNN Beginners Block
python 和深度学习的新手。我试图用一些数据构建一个 RNN,但我不知道哪里出错了。
这是我的代码:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
raw = pd.read_excel('Online Retail.xlsx',index_col='InvoiceDate')
sales = raw.drop(['InvoiceNo','StockCode','Country','Description'],axis=1)
sales.head()
sales.index = pd.to_datetime(sales.index)
sales.info()
train_set = sales.head(50000)
test_set = sales.tail(41909)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
training = np.nan_to_num(train_set)
testing = np.nan_to_num(test_set)
train_scaled = scaler.fit_transform(training)
test_scaled = scaler.fit_transform(testing)
def next_batch(training_data,batch_size,steps):
rand_start = np.random.randint(0,len(training_data)-steps)
y_batch =
np.array(training_data[rand_start:rand_start+steps+1].reshape(26,steps+1))
return
y_batch[:,:-1].reshape(-1,steps,1),y_batch[:,1:].reshape(-1,steps,1)
import tensorflow as tf
num_inputs = 1
num_time_steps = 10
num_neurons = 100
num_outputs = 1
learning_rate = 0.03
num_train_iterations = 4000
batch_size = 1
X = tf.placeholder(tf.float32,[None,num_time_steps,num_inputs])
y = tf.placeholder(tf.float32,[None,num_time_steps,num_outputs])
cell = tf.contrib.rnn.OutputProjectionWrapper(
tf.contrib.rnn.BasicLSTMCell(num_units=num_neurons,activation=tf.nn.relu),output_size=num_outputs)
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
loss = tf.reduce_mean(tf.square(outputs - y)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session(config=tf.ConfigProto()) as sess:
sess.run(init)
for iteration in range(num_train_iterations):
X_batch, y_batch = next_batch(train_scaled,batch_size,num_time_steps)
sess.run(train, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
# Save Model for Later
saver.save(sess, "./ex_time_series_model")
输出:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-36-f2f7c66a33df> in <module>()
4 for iteration in range(num_train_iterations):
5
----> 6 X_batch, y_batch = next_batch(train_scaled,batch_size,num_time_steps)
7 sess.run(train, feed_dict={X: X_batch, y: y_batch})
8
<ipython-input-26-f673a469c67d> in next_batch(training_data, batch_size, steps)
1 def next_batch(training_data,batch_size,steps):
2 rand_start = np.random.randint(0,len(training_data)-steps)
----> 3 y_batch = np.array(training_data[rand_start:rand_start+steps+1].reshape(26,steps+1))
4 return y_batch[:,:-1].reshape(-1,steps,1),y_batch[:,1:].reshape(-1,steps,1)
ValueError: cannot reshape array of size 33 into shape (26,11)
In [ ]:
错误表明您试图将大小为 33
的张量重塑为大小为 26x11
的张量,但您做不到。您应该将大小为 286
的张量重塑为 26x11
.
尝试通过在每个步骤中使用 print (y_batch.get_shape())
打印 y_batch
形状来调试 next_batch
函数,并检查它是否具有 286
.[=18 形状=]
这一点我没听清楚,为什么每批都随机取?为什么你没有正常读取输入数据?
如果您在发布代码时修复缩进会更好,很难跟踪。
我不确定 26
这个数字是从哪里来的,但它与您的数据维度不匹配。删除四列后,training_data
数组为 (50000, 3)
,其中您需要 (11, 3)
个批次。这个数组显然不能重塑为 (26, 11)
.
你的意思可能是这个(在 next_batch
函数中):
y_batch = np.array(training_data[rand_start:rand_start+steps+1].reshape(3,steps+1))
python 和深度学习的新手。我试图用一些数据构建一个 RNN,但我不知道哪里出错了。
这是我的代码:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
raw = pd.read_excel('Online Retail.xlsx',index_col='InvoiceDate')
sales = raw.drop(['InvoiceNo','StockCode','Country','Description'],axis=1)
sales.head()
sales.index = pd.to_datetime(sales.index)
sales.info()
train_set = sales.head(50000)
test_set = sales.tail(41909)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
training = np.nan_to_num(train_set)
testing = np.nan_to_num(test_set)
train_scaled = scaler.fit_transform(training)
test_scaled = scaler.fit_transform(testing)
def next_batch(training_data,batch_size,steps):
rand_start = np.random.randint(0,len(training_data)-steps)
y_batch =
np.array(training_data[rand_start:rand_start+steps+1].reshape(26,steps+1))
return
y_batch[:,:-1].reshape(-1,steps,1),y_batch[:,1:].reshape(-1,steps,1)
import tensorflow as tf
num_inputs = 1
num_time_steps = 10
num_neurons = 100
num_outputs = 1
learning_rate = 0.03
num_train_iterations = 4000
batch_size = 1
X = tf.placeholder(tf.float32,[None,num_time_steps,num_inputs])
y = tf.placeholder(tf.float32,[None,num_time_steps,num_outputs])
cell = tf.contrib.rnn.OutputProjectionWrapper(
tf.contrib.rnn.BasicLSTMCell(num_units=num_neurons,activation=tf.nn.relu),output_size=num_outputs)
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
loss = tf.reduce_mean(tf.square(outputs - y)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session(config=tf.ConfigProto()) as sess:
sess.run(init)
for iteration in range(num_train_iterations):
X_batch, y_batch = next_batch(train_scaled,batch_size,num_time_steps)
sess.run(train, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
# Save Model for Later
saver.save(sess, "./ex_time_series_model")
输出:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-36-f2f7c66a33df> in <module>()
4 for iteration in range(num_train_iterations):
5
----> 6 X_batch, y_batch = next_batch(train_scaled,batch_size,num_time_steps)
7 sess.run(train, feed_dict={X: X_batch, y: y_batch})
8
<ipython-input-26-f673a469c67d> in next_batch(training_data, batch_size, steps)
1 def next_batch(training_data,batch_size,steps):
2 rand_start = np.random.randint(0,len(training_data)-steps)
----> 3 y_batch = np.array(training_data[rand_start:rand_start+steps+1].reshape(26,steps+1))
4 return y_batch[:,:-1].reshape(-1,steps,1),y_batch[:,1:].reshape(-1,steps,1)
ValueError: cannot reshape array of size 33 into shape (26,11)
In [ ]:
错误表明您试图将大小为 33
的张量重塑为大小为 26x11
的张量,但您做不到。您应该将大小为 286
的张量重塑为 26x11
.
尝试通过在每个步骤中使用 print (y_batch.get_shape())
打印 y_batch
形状来调试 next_batch
函数,并检查它是否具有 286
.[=18 形状=]
这一点我没听清楚,为什么每批都随机取?为什么你没有正常读取输入数据?
如果您在发布代码时修复缩进会更好,很难跟踪。
我不确定 26
这个数字是从哪里来的,但它与您的数据维度不匹配。删除四列后,training_data
数组为 (50000, 3)
,其中您需要 (11, 3)
个批次。这个数组显然不能重塑为 (26, 11)
.
你的意思可能是这个(在 next_batch
函数中):
y_batch = np.array(training_data[rand_start:rand_start+steps+1].reshape(3,steps+1))