使用 numpy python 计算宏观平均 F1 分数(不使用 scikit 学习)

Computing Macro average F1 score using numpy python(Without using scikit learn)

我想单独使用 numpy 计算宏观平均 f1 分数? 例如:

Actual = np.array(["A","A","B","C","C"])
predicted = np.array[("A","A","B","C","C")]

除非你想通过这个了解更多,或者如果你出于某种原因无法访问 sklearn,我不建议这样做。 sklearn.

提供了一个已经可信的(并且更优化的)代码

以下是实现您自己的版本的方法:

def f1(actual, predicted, label):

    """ A helper function to calculate f1-score for the given `label` """

    # F1 = 2 * (precision * recall) / (precision + recall)
    tp = np.sum((actual==label) & (predicted==label))
    fp = np.sum((actual!=label) & (predicted==label))
    fn = np.sum((predicted!=label) & (actual==label))
    
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    f1 = 2 * (precision * recall) / (precision + recall)
    return f1

def f1_macro(actual, predicted):
    # `macro` f1- unweighted mean of f1 per label
    return np.mean([f1(actual, predicted, label) 
        for label in np.unique(actual)])

然后,您可以计算"macro-f1"如下:

f1_macro(actual, predicted) #outputs 1.0

您可以使用 sklearn.metrics.f1_score(actual, predicted, average='macro') 测试您的实现。

import numpy as np
def f1(y_true, y_pred):
  TP = np.sum(np.multiply([i==True for i in y_pred], y_true))
  TN = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
  FP = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
  FN = np.sum(np.multiply([i==False for i in y_pred], y_true))
  precision = TP/(TP+FP)
  recall = TP/(TP+FN)
  if precision != 0 and recall != 0:
    f1 = (2 * precision * recall) / (precision + recall)
  else:
    f1 = 0
  return f1

def f1_macro(y_true, y_pred):
  macro = []
  for i in np.unique(y_true):
    modified_true = [i==j for j in y_true]
    modified_pred = [i==j for j in y_pred]
    score = f1(modified_true, modified_pred)
    macro.append(score)
  return np.mean(macro)

测试用例

y_true = [0, 1, 2, 0, 1, 2]
y_pred = [0, 2, 1, 0, 0, 1]
s = f1_macro(y_true, y_pred)
print(f"Macro F1 score: {s}")
>>> Macro F1 score: 0.26666666666666666

y_true = np.array(["A","A","B","C","C"])
y_pred = np.array(["A","A","B","C","C"])
s = f1_macro(y_true, y_pred)
print(f"Macro F1 score: {s}")
>>> Macro F1 score: 1.0