使用网格搜索 CV 时如何在 Keras 模型中使用自定义指标?
How to use custom metrics in Keras model while using Grid Search CV?
我想在我的 Keras 模型中使用 R2(决定系数)作为指标。为此,我已经定义了一个函数 (coeff_determination)。这个函数作为一个指标在没有网格搜索 CV 的情况下工作得很好,但是在网格搜索 CV 的情况下它会给出一个像 "The model is not configured to compute the accuracy. You should pass metrics=["accuracy"]
to the model.compile()
method" 这样的错误。代码如下。
def create_model():
#CNN Architecture - Model 7
model = Sequential()
model.add(Convolution1D(filters=10, kernel_size=12, activation="relu", kernel_initializer="glorot_uniform", input_shape=(X_train.shape[1],1)))
model.add(MaxPooling1D(pool_size=4, strides=2))
model.add(BatchNormalization())
model.add(Convolution1D(filters=16, kernel_size=12, activation='relu', kernel_initializer="glorot_uniform"))
model.add(MaxPooling1D(pool_size=3, strides=2))
model.add(BatchNormalization())
model.add(Convolution1D(filters=22, kernel_size=12, activation='relu', kernel_initializer="glorot_uniform"))
model.add(MaxPooling1D(pool_size=3, strides=2))
model.add(BatchNormalization())
model.add(Convolution1D(filters=28, kernel_size=12, activation='relu', kernel_initializer="glorot_uniform"))
model.add(MaxPooling1D(pool_size=4, strides=2))
model.add(BatchNormalization())
model.add(Convolution1D(filters=34, kernel_size=12, activation='relu', kernel_initializer="glorot_uniform"))
model.add(MaxPooling1D(pool_size=3, strides=2))
model.add(BatchNormalization())
model.add(Convolution1D(filters=40, kernel_size=12, activation='relu', kernel_initializer="glorot_uniform"))
model.add(MaxPooling1D(pool_size=3, strides=2))
model.add(BatchNormalization())
model.add(Flatten())
#model.add(Dropout(0.35))
model.add(Dense(130, activation='relu'))
#model.add(Dropout(0.35))
model.add(Dense(130, activation='relu'))
model.add(Dense(1, activation='linear'))
history = History()
model.compile(loss='mean_squared_error',optimizer= Adam(lr=0.0001), metrics=[coeff_determination])
#model.fit(X_train,y_train, validation_data=(X_test,y_test), epochs=400, batch_size=30, callbacks=[history])
return model
def coeff_determination(y_true, y_pred):
SS_res = K.sum(K.square(y_true - y_pred))
SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
return (1 - SS_res / (SS_tot + K.epsilon()))
# to reprduce the same results next time
seed = 7
np.random.seed(seed)
# Creating Keras model with Scikit learn wrap-up
model = KerasClassifier(build_fn=create_model, verbose=0)
# define the grid search parameters
batch_size = [20,30,40,80]
epochs = [100,200,300,400]
# Using make scorer to convert metric r_2 to a scorer
my_scorer = make_scorer(r2_score, greater_is_better=True)
# passing dictionaries of parameters to the GridSearchCV
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, scoring=my_scorer, param_grid=param_grid, n_jobs=1, cv=3)
grid_result = grid.fit(X_train, y_train)
# summarizing the results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
我认为您需要将自定义评分函数作为 scoring
参数的输入提供给 GridSearchCV
,否则它将寻找默认估算器的评分方法,即准确性。
scoring:
str, callable, list/tuple or dict, default=None
A single str (see The scoring parameter: defining model evaluation rules) or a callable (see Defining your scoring strategy from metric functions) to evaluate the predictions on the test set.
For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values.
NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each.
See Specifying multiple metrics for evaluation for an example.
If None, the estimator’s score method is used.
我想在我的 Keras 模型中使用 R2(决定系数)作为指标。为此,我已经定义了一个函数 (coeff_determination)。这个函数作为一个指标在没有网格搜索 CV 的情况下工作得很好,但是在网格搜索 CV 的情况下它会给出一个像 "The model is not configured to compute the accuracy. You should pass metrics=["accuracy"]
to the model.compile()
method" 这样的错误。代码如下。
def create_model():
#CNN Architecture - Model 7
model = Sequential()
model.add(Convolution1D(filters=10, kernel_size=12, activation="relu", kernel_initializer="glorot_uniform", input_shape=(X_train.shape[1],1)))
model.add(MaxPooling1D(pool_size=4, strides=2))
model.add(BatchNormalization())
model.add(Convolution1D(filters=16, kernel_size=12, activation='relu', kernel_initializer="glorot_uniform"))
model.add(MaxPooling1D(pool_size=3, strides=2))
model.add(BatchNormalization())
model.add(Convolution1D(filters=22, kernel_size=12, activation='relu', kernel_initializer="glorot_uniform"))
model.add(MaxPooling1D(pool_size=3, strides=2))
model.add(BatchNormalization())
model.add(Convolution1D(filters=28, kernel_size=12, activation='relu', kernel_initializer="glorot_uniform"))
model.add(MaxPooling1D(pool_size=4, strides=2))
model.add(BatchNormalization())
model.add(Convolution1D(filters=34, kernel_size=12, activation='relu', kernel_initializer="glorot_uniform"))
model.add(MaxPooling1D(pool_size=3, strides=2))
model.add(BatchNormalization())
model.add(Convolution1D(filters=40, kernel_size=12, activation='relu', kernel_initializer="glorot_uniform"))
model.add(MaxPooling1D(pool_size=3, strides=2))
model.add(BatchNormalization())
model.add(Flatten())
#model.add(Dropout(0.35))
model.add(Dense(130, activation='relu'))
#model.add(Dropout(0.35))
model.add(Dense(130, activation='relu'))
model.add(Dense(1, activation='linear'))
history = History()
model.compile(loss='mean_squared_error',optimizer= Adam(lr=0.0001), metrics=[coeff_determination])
#model.fit(X_train,y_train, validation_data=(X_test,y_test), epochs=400, batch_size=30, callbacks=[history])
return model
def coeff_determination(y_true, y_pred):
SS_res = K.sum(K.square(y_true - y_pred))
SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
return (1 - SS_res / (SS_tot + K.epsilon()))
# to reprduce the same results next time
seed = 7
np.random.seed(seed)
# Creating Keras model with Scikit learn wrap-up
model = KerasClassifier(build_fn=create_model, verbose=0)
# define the grid search parameters
batch_size = [20,30,40,80]
epochs = [100,200,300,400]
# Using make scorer to convert metric r_2 to a scorer
my_scorer = make_scorer(r2_score, greater_is_better=True)
# passing dictionaries of parameters to the GridSearchCV
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, scoring=my_scorer, param_grid=param_grid, n_jobs=1, cv=3)
grid_result = grid.fit(X_train, y_train)
# summarizing the results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
我认为您需要将自定义评分函数作为 scoring
参数的输入提供给 GridSearchCV
,否则它将寻找默认估算器的评分方法,即准确性。
scoring:
str, callable, list/tuple or dict, default=None A single str (see The scoring parameter: defining model evaluation rules) or a callable (see Defining your scoring strategy from metric functions) to evaluate the predictions on the test set.For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values.
NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each.
See Specifying multiple metrics for evaluation for an example.
If None, the estimator’s score method is used.