如何使用 Pipeline 和 GridSearchCV 查找 LinearRegression 问题的系数
How to find coefficients for LinearRegression problem with Pipeline and GridSearchCV
我正在使用管道和 GridSearchCV 执行 LinearRegression 模型,我无法设法使其达到为 X_train 的每个特征计算的系数。
mlr_gridsearchcv = Pipeline(steps =[('preprocessor', preprocessor),
('gridsearchcv_lr', GridSearchCV(TransformedTargetRegressor(regressor= LinearRegression(),
func = np.log,inverse_func = np.exp), param_grid=parameter_lr, cv = nfolds,
scoring = ('r2','neg_mean_absolute_error'), return_train_score = True,
refit='neg_mean_absolute_error', n_jobs = -1))])
mlr_co2=mlr_gridsearchcv.fit(X_train,Y_train['co2e'])
我试过先得到 best_estimator_:
mlr_co2.named_steps['gridsearchcv_lr'].cv_results_.best_estimator_
我得到:
AttributeError: 'dict' object has no attribute 'best_estimator_'
如果我这样尝试:
mlr_co2.named_steps['gridsearchcv_lr'].best_estimator_.regressor.coef_
我得到:
AttributeError: 'LinearRegression' object has no attribute 'coef_'
我尝试了其他组合,但似乎没有任何效果。
您可以使用:
results['gridsearchcv'].best_estimator_.regressor_.coef_
其中 results
是拟合管道,'gridsearchcv'
是管道中网格搜索步骤的名称,请参见下面的代码。
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import TransformedTargetRegressor
np.random.seed(42)
# generate the data
X = np.random.lognormal(0, 1, (100, 3))
y = np.mean(X, axis=1) + np.random.normal(0, 0.1, 100)
# define the pipeline
preprocessor = MinMaxScaler(feature_range=(0, 1))
estimator = TransformedTargetRegressor(
regressor=LinearRegression(),
func=np.log,
inverse_func=np.exp
)
gridsearchcv = GridSearchCV(
estimator,
param_grid={'regressor__fit_intercept': [True, False]},
cv=5,
scoring=('r2', 'neg_mean_absolute_error'),
return_train_score=True,
refit='neg_mean_absolute_error',
n_jobs=-1
)
pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('gridsearchcv', gridsearchcv)
])
# fit the pipeline
results = pipeline.fit(X, y)
# extract the estimated coefficients of the best model
results['gridsearchcv'].best_estimator_.regressor_.coef_
# [0.89791824 1.11311974 2.99750775]
我正在使用管道和 GridSearchCV 执行 LinearRegression 模型,我无法设法使其达到为 X_train 的每个特征计算的系数。
mlr_gridsearchcv = Pipeline(steps =[('preprocessor', preprocessor),
('gridsearchcv_lr', GridSearchCV(TransformedTargetRegressor(regressor= LinearRegression(),
func = np.log,inverse_func = np.exp), param_grid=parameter_lr, cv = nfolds,
scoring = ('r2','neg_mean_absolute_error'), return_train_score = True,
refit='neg_mean_absolute_error', n_jobs = -1))])
mlr_co2=mlr_gridsearchcv.fit(X_train,Y_train['co2e'])
我试过先得到 best_estimator_:
mlr_co2.named_steps['gridsearchcv_lr'].cv_results_.best_estimator_
我得到:
AttributeError: 'dict' object has no attribute 'best_estimator_'
如果我这样尝试:
mlr_co2.named_steps['gridsearchcv_lr'].best_estimator_.regressor.coef_
我得到:
AttributeError: 'LinearRegression' object has no attribute 'coef_'
我尝试了其他组合,但似乎没有任何效果。
您可以使用:
results['gridsearchcv'].best_estimator_.regressor_.coef_
其中 results
是拟合管道,'gridsearchcv'
是管道中网格搜索步骤的名称,请参见下面的代码。
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import TransformedTargetRegressor
np.random.seed(42)
# generate the data
X = np.random.lognormal(0, 1, (100, 3))
y = np.mean(X, axis=1) + np.random.normal(0, 0.1, 100)
# define the pipeline
preprocessor = MinMaxScaler(feature_range=(0, 1))
estimator = TransformedTargetRegressor(
regressor=LinearRegression(),
func=np.log,
inverse_func=np.exp
)
gridsearchcv = GridSearchCV(
estimator,
param_grid={'regressor__fit_intercept': [True, False]},
cv=5,
scoring=('r2', 'neg_mean_absolute_error'),
return_train_score=True,
refit='neg_mean_absolute_error',
n_jobs=-1
)
pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('gridsearchcv', gridsearchcv)
])
# fit the pipeline
results = pipeline.fit(X, y)
# extract the estimated coefficients of the best model
results['gridsearchcv'].best_estimator_.regressor_.coef_
# [0.89791824 1.11311974 2.99750775]