Pandas 使用标准偏差 Python 的移动平均线
Pandas moving average using a standard deviation in Python
我想在使用 RandomForestRegressor
拟合回归模型后使用 moving average filter
平滑噪声 我正在考虑使用在 this link
中找到的数据集
import pandas as pd
import math
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error, make_scorer
from sklearn.model_selection import train_test_split
from math import sqrt
from sklearn.cross_validation import train_test_split
n_features=3000
df = pd.read_csv('cubic32.csv')
for i in range(1,n_features):
df['X_t'+str(i)] = df['X'].shift(i)
print(df)
df.dropna(inplace=True)
X = df.drop('Y', axis=1)
y = df['Y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40)
X_train = X_train.drop('time', axis=1)
X_test = X_test.drop('time', axis=1)
parameters = {'n_estimators': [10]}
clf_rf = RandomForestRegressor(random_state=1)
clf = GridSearchCV(clf_rf, parameters, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
model = clf.fit(X_train, y_train)
model.cv_results_['params'][model.best_index_]
math.sqrt(model.best_score_*-1)
model.grid_scores_
#####
print()
print(model.grid_scores_)
print("The best score: ",model.best_score_)
print("RMSE:",math.sqrt(model.best_score_*-1))
clf_rf.fit(X_train,y_train)
modelPrediction = clf_rf.predict(X_test)
print(modelPrediction)
print("Number of predictions:",len(modelPrediction))
meanSquaredError=mean_squared_error(y_test, modelPrediction)
print("Mean Square Error (MSE):", meanSquaredError)
rootMeanSquaredError = sqrt(meanSquaredError)
print("Root-Mean-Square Error (RMSE):", rootMeanSquaredError)
fig, ax = plt.subplots()
index_values=range(0,len(y_test))
y_test.sort_index(inplace=True)
X_test.sort_index(inplace=True)
modelPred_test = clf_rf.predict(X_test)
ax.plot(pd.Series(index_values), y_test.values)
smoothed=pd.rolling_mean(modelPred_test, 90, min_periods=90, freq=None, center=False, how=None)
PlotInOne=pd.DataFrame(pd.concat([pd.Series(smoothed), pd.Series(y_test.values)], axis=1))
plt.figure(); PlotInOne.plot(); plt.legend(loc='best')
但是,预测值的绘图(如下所示)似乎非常粗糙(蓝线)。
橙色线是实际值的图表。
我们如何计算上图中预测(蓝线)的标准差,并将其作为区间参数传递给window 运行?目前,我将移动的大小 window 手动设置为 50,但我想改为传递标准差的值。
smoothed=pd.rolling_mean(modelPred_test, 50, min_periods=50, freq=None, center=False, how=None)
smoothed=pd.rolling(modelPred_test, 50, min_periods=50, freq=None, center=False, how=None).std()
pd.rolling_mean(modelPred_test,windows=round(np.std(modelPred_test)))
你也可以把标准差代入最小值window~
我想在使用 RandomForestRegressor
拟合回归模型后使用 moving average filter
平滑噪声 我正在考虑使用在 this link
import pandas as pd
import math
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error, make_scorer
from sklearn.model_selection import train_test_split
from math import sqrt
from sklearn.cross_validation import train_test_split
n_features=3000
df = pd.read_csv('cubic32.csv')
for i in range(1,n_features):
df['X_t'+str(i)] = df['X'].shift(i)
print(df)
df.dropna(inplace=True)
X = df.drop('Y', axis=1)
y = df['Y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40)
X_train = X_train.drop('time', axis=1)
X_test = X_test.drop('time', axis=1)
parameters = {'n_estimators': [10]}
clf_rf = RandomForestRegressor(random_state=1)
clf = GridSearchCV(clf_rf, parameters, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
model = clf.fit(X_train, y_train)
model.cv_results_['params'][model.best_index_]
math.sqrt(model.best_score_*-1)
model.grid_scores_
#####
print()
print(model.grid_scores_)
print("The best score: ",model.best_score_)
print("RMSE:",math.sqrt(model.best_score_*-1))
clf_rf.fit(X_train,y_train)
modelPrediction = clf_rf.predict(X_test)
print(modelPrediction)
print("Number of predictions:",len(modelPrediction))
meanSquaredError=mean_squared_error(y_test, modelPrediction)
print("Mean Square Error (MSE):", meanSquaredError)
rootMeanSquaredError = sqrt(meanSquaredError)
print("Root-Mean-Square Error (RMSE):", rootMeanSquaredError)
fig, ax = plt.subplots()
index_values=range(0,len(y_test))
y_test.sort_index(inplace=True)
X_test.sort_index(inplace=True)
modelPred_test = clf_rf.predict(X_test)
ax.plot(pd.Series(index_values), y_test.values)
smoothed=pd.rolling_mean(modelPred_test, 90, min_periods=90, freq=None, center=False, how=None)
PlotInOne=pd.DataFrame(pd.concat([pd.Series(smoothed), pd.Series(y_test.values)], axis=1))
plt.figure(); PlotInOne.plot(); plt.legend(loc='best')
但是,预测值的绘图(如下所示)似乎非常粗糙(蓝线)。
橙色线是实际值的图表。
我们如何计算上图中预测(蓝线)的标准差,并将其作为区间参数传递给window 运行?目前,我将移动的大小 window 手动设置为 50,但我想改为传递标准差的值。
smoothed=pd.rolling_mean(modelPred_test, 50, min_periods=50, freq=None, center=False, how=None)
smoothed=pd.rolling(modelPred_test, 50, min_periods=50, freq=None, center=False, how=None).std()
pd.rolling_mean(modelPred_test,windows=round(np.std(modelPred_test)))
你也可以把标准差代入最小值window~