神经网络模型精度超低
Super low accuracy for neural network model
我遵循了使用交叉验证和代码进行神经网络模型评估的教程:
# Multiclass Classification with the Iris Flowers Dataset
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataframe = pandas.read_csv("/content/drive/My Drive/iris.data", header=None)
dataset = dataframe.values
X = dataset[:,0:4].astype(float)
Y = dataset[:,4]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)
# define baseline model
def baseline_model():
# create model
model = Sequential()
model.add(Dense(4, input_dim=4, activation="relu", kernel_initializer="normal"))
model.add(Dense(3, activation="sigmoid", kernel_initializer="normal"))
# Compile model
model.compile(loss= 'categorical_crossentropy' , optimizer= 'adam' , metrics=[ 'accuracy' ])
return model
estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=200, batch_size=5, verbose=0)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
准确度应该在 95.33% (4.27%)
左右,但我尝试了几次 ~Accuracy: 34.00% (13.15%)
。模型代码似乎完全相同。我按照指示从 here 下载了数据。会出什么问题?谢谢
替换为:
model.add(Dense(4, input_dim=4, activation="relu", kernel_initializer="normal"))
有了这个:
model.add(Dense(16, activation="relu"))
model.add(Dense(32, activation="relu"))
那么,你的输出层为:
model.add(Dense(3, activation="softmax", kernel_initializer="normal"))
你的隐藏层极小,你的激活函数是错误的。对于 3+ 类,它必须是 softmax
。
FULL 工作代码:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
seed = 7
numpy.random.seed(seed)
from sklearn.datasets import load_iris
X, encoded_Y = load_iris(return_X_y=True)
mms = MinMaxScaler()
X = mms.fit_transform(X)
dummy_y = np_utils.to_categorical(encoded_Y)
def baseline_model():
model = Sequential()
model.add(Dense(4, input_dim=4, activation="relu", kernel_initializer="normal"))
model.add(Dense(8, activation="relu", kernel_initializer="normal"))
model.add(Dense(3, activation="softmax", kernel_initializer="normal"))
model.compile(loss= 'categorical_crossentropy' , optimizer='adam', metrics=[
'accuracy' ])
return model
estimator = KerasClassifier(build_fn=baseline_model, epochs=200, verbose=0)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print(results)
Out[5]:
array([0.60000002, 0.93333334, 1. , 0.66666669, 0.80000001,
1. , 1. , 0.93333334, 0.80000001, 0.86666667])
仅凭机会,您应该获得 33% 的准确率。
如何改进代码:
- 标准化数据。
from sklearn.preprocessing import StandardScaler, MinMaxScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
- 增加层的神经元数量,
- 改变输出的激活函数从
sigmoid
做 softmax
,
- 使用
categorical_crossentropy
作为输出的损失,
# define baseline model
def baseline_model():
# create model
model = Sequential()
model.add(Dense(8, input_dim=4, activation="relu"))
model.add(Dense(3, activation="softmax"))
# Compile model
model.compile(loss= 'categorical_crossentropy' , optimizer= 'adam' , metrics=[ 'accuracy' ])
return model
- 将
nb_epoch
(旧 Keras)更改为 epochs
,
estimator = KerasClassifier(build_fn=baseline_model, epochs=50, batch_size=5, verbose=1)
这样您的准确率将达到 90% 左右。如果你 运行 它超过 50 个 epoch,你最终会过度拟合你的模型,你甚至可以达到 100% 的准确率,但模型不会很好地泛化。
请记住,全连接层并不总是最好的解决方案。
我遵循了使用交叉验证和代码进行神经网络模型评估的教程:
# Multiclass Classification with the Iris Flowers Dataset
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataframe = pandas.read_csv("/content/drive/My Drive/iris.data", header=None)
dataset = dataframe.values
X = dataset[:,0:4].astype(float)
Y = dataset[:,4]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)
# define baseline model
def baseline_model():
# create model
model = Sequential()
model.add(Dense(4, input_dim=4, activation="relu", kernel_initializer="normal"))
model.add(Dense(3, activation="sigmoid", kernel_initializer="normal"))
# Compile model
model.compile(loss= 'categorical_crossentropy' , optimizer= 'adam' , metrics=[ 'accuracy' ])
return model
estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=200, batch_size=5, verbose=0)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
准确度应该在 95.33% (4.27%)
左右,但我尝试了几次 ~Accuracy: 34.00% (13.15%)
。模型代码似乎完全相同。我按照指示从 here 下载了数据。会出什么问题?谢谢
替换为:
model.add(Dense(4, input_dim=4, activation="relu", kernel_initializer="normal"))
有了这个:
model.add(Dense(16, activation="relu"))
model.add(Dense(32, activation="relu"))
那么,你的输出层为:
model.add(Dense(3, activation="softmax", kernel_initializer="normal"))
你的隐藏层极小,你的激活函数是错误的。对于 3+ 类,它必须是 softmax
。
FULL 工作代码:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
seed = 7
numpy.random.seed(seed)
from sklearn.datasets import load_iris
X, encoded_Y = load_iris(return_X_y=True)
mms = MinMaxScaler()
X = mms.fit_transform(X)
dummy_y = np_utils.to_categorical(encoded_Y)
def baseline_model():
model = Sequential()
model.add(Dense(4, input_dim=4, activation="relu", kernel_initializer="normal"))
model.add(Dense(8, activation="relu", kernel_initializer="normal"))
model.add(Dense(3, activation="softmax", kernel_initializer="normal"))
model.compile(loss= 'categorical_crossentropy' , optimizer='adam', metrics=[
'accuracy' ])
return model
estimator = KerasClassifier(build_fn=baseline_model, epochs=200, verbose=0)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print(results)
Out[5]:
array([0.60000002, 0.93333334, 1. , 0.66666669, 0.80000001,
1. , 1. , 0.93333334, 0.80000001, 0.86666667])
仅凭机会,您应该获得 33% 的准确率。
如何改进代码:
- 标准化数据。
from sklearn.preprocessing import StandardScaler, MinMaxScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
- 增加层的神经元数量,
- 改变输出的激活函数从
sigmoid
做softmax
, - 使用
categorical_crossentropy
作为输出的损失,
# define baseline model
def baseline_model():
# create model
model = Sequential()
model.add(Dense(8, input_dim=4, activation="relu"))
model.add(Dense(3, activation="softmax"))
# Compile model
model.compile(loss= 'categorical_crossentropy' , optimizer= 'adam' , metrics=[ 'accuracy' ])
return model
- 将
nb_epoch
(旧 Keras)更改为epochs
,
estimator = KerasClassifier(build_fn=baseline_model, epochs=50, batch_size=5, verbose=1)
这样您的准确率将达到 90% 左右。如果你 运行 它超过 50 个 epoch,你最终会过度拟合你的模型,你甚至可以达到 100% 的准确率,但模型不会很好地泛化。
请记住,全连接层并不总是最好的解决方案。