ValueError: multiclass format is not supported
ValueError: multiclass format is not supported
当我尝试使用 metrics.roc_auc_score 时,我得到 ValueError: multiclass format is not supported
。
import lightgbm as lgb
from sklearn import metrics
def train_model(train, valid):
dtrain = lgb.Dataset(train, label=y_train)
dvalid = lgb.Dataset(valid, label=y_valid)
param = {'num_leaves': 64, 'objective': 'binary',
'metric': 'auc', 'seed': 7}
print("Training model!")
bst = lgb.train(param, dtrain, num_boost_round=1000, valid_sets=[dvalid],
early_stopping_rounds=10, verbose_eval=False)
valid_pred = bst.predict(valid)
print('Valid_pred: ')
print(valid_pred)
print('y_valid:')
print(y_valid)
valid_score = metrics.roc_auc_score(y_valid, valid_pred)
print(f"Validation AUC score: {valid_score:.4f}")
return bst
bst = train_model(X_train_final, X_valid_final)
valid_pred 和 y_valid 是:
Training model!
Valid_pred:
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1.]
y_valid:
Id
530 200624
492 133000
460 110000
280 192000
656 88000
...
327 324000
441 555000
1388 136000
1324 82500
62 101000
Name: SalePrice, Length: 292, dtype: int64
错误:
ValueError Traceback (most recent call last)
<ipython-input-80-df034caf8c9b> in <module>
----> 1 bst = train_model(X_train_final, X_valid_final)
<ipython-input-79-483a6fb5ab9b> in train_model(train, valid)
17 print('y_valid:')
18 print(y_valid)
---> 19 valid_score = metrics.roc_auc_score(y_valid, valid_pred)
20 print(f"Validation AUC score: {valid_score:.4f}")
21 return bst
/opt/conda/lib/python3.6/site-packages/sklearn/metrics/ranking.py in roc_auc_score(y_true, y_score, average, sample_weight, max_fpr)
353 return _average_binary_score(
354 _binary_roc_auc_score, y_true, y_score, average,
--> 355 sample_weight=sample_weight)
356
357
/opt/conda/lib/python3.6/site-packages/sklearn/metrics/base.py in _average_binary_score(binary_metric, y_true, y_score, average, sample_weight)
71 y_type = type_of_target(y_true)
72 if y_type not in ("binary", "multilabel-indicator"):
---> 73 raise ValueError("{0} format is not supported".format(y_type))
74
75 if y_type == "binary":
ValueError: multiclass format is not supported
我试过:
valid_pred = pd.Series(bst.predict(valid)).astype(np.int64)
我也删除了 'objective': 'binary'
并尝试但没有成功。
仍然无法弄清楚是什么问题。
您要解决的任务似乎是回归:预测价格。但是,您正在训练一个 class 化模型,它为每个输入分配一个 class。
ROC-AUC 分数适用于 class化问题,其中输出是输入属于 class 的概率。如果你做一个多classclass化,那么你可以独立计算每个class的分数。
此外,predict
方法 returns 是离散的 class,而不是概率。假设您进行二进制 class 化并且只有一个示例,它应该 class 化为 False
。如果您的 classifier 产生的概率为 0.7,则 ROC-AUC 值为 1.0-0.7=0.3。如果你使用predict
方法,ROC-AUC值将是1.0-1.0=0.0,这不会告诉你太多。
当我尝试使用 metrics.roc_auc_score 时,我得到 ValueError: multiclass format is not supported
。
import lightgbm as lgb
from sklearn import metrics
def train_model(train, valid):
dtrain = lgb.Dataset(train, label=y_train)
dvalid = lgb.Dataset(valid, label=y_valid)
param = {'num_leaves': 64, 'objective': 'binary',
'metric': 'auc', 'seed': 7}
print("Training model!")
bst = lgb.train(param, dtrain, num_boost_round=1000, valid_sets=[dvalid],
early_stopping_rounds=10, verbose_eval=False)
valid_pred = bst.predict(valid)
print('Valid_pred: ')
print(valid_pred)
print('y_valid:')
print(y_valid)
valid_score = metrics.roc_auc_score(y_valid, valid_pred)
print(f"Validation AUC score: {valid_score:.4f}")
return bst
bst = train_model(X_train_final, X_valid_final)
valid_pred 和 y_valid 是:
Training model!
Valid_pred:
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1.]
y_valid:
Id
530 200624
492 133000
460 110000
280 192000
656 88000
...
327 324000
441 555000
1388 136000
1324 82500
62 101000
Name: SalePrice, Length: 292, dtype: int64
错误:
ValueError Traceback (most recent call last)
<ipython-input-80-df034caf8c9b> in <module>
----> 1 bst = train_model(X_train_final, X_valid_final)
<ipython-input-79-483a6fb5ab9b> in train_model(train, valid)
17 print('y_valid:')
18 print(y_valid)
---> 19 valid_score = metrics.roc_auc_score(y_valid, valid_pred)
20 print(f"Validation AUC score: {valid_score:.4f}")
21 return bst
/opt/conda/lib/python3.6/site-packages/sklearn/metrics/ranking.py in roc_auc_score(y_true, y_score, average, sample_weight, max_fpr)
353 return _average_binary_score(
354 _binary_roc_auc_score, y_true, y_score, average,
--> 355 sample_weight=sample_weight)
356
357
/opt/conda/lib/python3.6/site-packages/sklearn/metrics/base.py in _average_binary_score(binary_metric, y_true, y_score, average, sample_weight)
71 y_type = type_of_target(y_true)
72 if y_type not in ("binary", "multilabel-indicator"):
---> 73 raise ValueError("{0} format is not supported".format(y_type))
74
75 if y_type == "binary":
ValueError: multiclass format is not supported
我试过:
valid_pred = pd.Series(bst.predict(valid)).astype(np.int64)
我也删除了 'objective': 'binary'
并尝试但没有成功。
仍然无法弄清楚是什么问题。
您要解决的任务似乎是回归:预测价格。但是,您正在训练一个 class 化模型,它为每个输入分配一个 class。
ROC-AUC 分数适用于 class化问题,其中输出是输入属于 class 的概率。如果你做一个多classclass化,那么你可以独立计算每个class的分数。
此外,predict
方法 returns 是离散的 class,而不是概率。假设您进行二进制 class 化并且只有一个示例,它应该 class 化为 False
。如果您的 classifier 产生的概率为 0.7,则 ROC-AUC 值为 1.0-0.7=0.3。如果你使用predict
方法,ROC-AUC值将是1.0-1.0=0.0,这不会告诉你太多。