Python: 按键错误

Python: KeyError

我想使用以下代码计算标称特征和数字特征之间的 correlation ratio

def corr_ratio(values, category_labels):


    # Computes correlation ratio for a given array of values and category_labels
    # Eqn: eta^2 = (sum_x [N_x * (mean(y_x) - mean(y))^2]) / (sum_x [sum_i [(y_xi - mean(y))^2]])
    # values: 1D array
    # category_labels: 1D array
    category_labels = np.array(category_labels)
    values = np.array(values)
    categories = np.unique(category_labels)
    data_dict = {}
    for catname in categories:
        indices = np.where(category_labels == catname)
    data_dict[catname] = values[indices]
    cat_means = {catname: np.mean(data_dict[catname]) for catname in categories}
    overall_mean = np.mean(values)
    nominator = np.sum([len(data_dict[catname]) * (cat_means[catname] - overall_mean) ** 2 for catname in categories])
    denominator = np.sum([(val - overall_mean) ** 2 for catname in categories for val in data_dict[catname]])
    corr_ratio = nominator / denominator
    return corr_ratio


data = {
    'Month': ['August', 'August', 'August', 'August', 'August', 'August', 'August', 'August', 'August', 'August',
              'August', 'August',
              'February', 'February', 'February', 'February', 'February', 'February', 'February', 'February',
              'February', 'February', 'February', 'February'],
    'Day': ['Sunday', 'Monday', 'Tuesday', 'Sunday', 'Monday', 'Tuesday', 'Sunday', 'Monday', 'Tuesday', 'Sunday',
            'Monday', 'Tuesday',
            'Sunday', 'Monday', 'Tuesday', 'Sunday', 'Monday', 'Tuesday', 'Sunday', 'Monday', 'Tuesday', 'Sunday',
            'Monday', 'Tuesday', ],
    'Temperature': [34, 32, 33, 36, 37, 35, 29, 32, 33, 32, 36, 30,
                    19, 22, 21, 17, 15, 14, 19, 20, 22, 20, 19, 18],
    'WorkingHours': [0, 9.5, 8.5, 0, 9, 8.5, 0, 10, 9.5, 0, 8, 8.5,
                     0, 8.5, 9, 0, 9, 9, 0, 10, 8, 0, 8.5, 9.5]}

df = pd.DataFrame(data)

print(corr_ratio(df['Temperature'], df['Day']))

但是,产生了以下错误。如果您能告诉我如何解决它,我将不胜感激。

    print(corr_ratio(df['Temperature'], df['Day']))
  File "D:/mifs-master_2/MU/learning-from-imbalanced-classes-master/learning-from-imbalanced-classes-master/continuous/Final Logit/logit-final.py", line 383, in corr_ratio
    cat_means = {catname: np.mean(data_dict[catname]) for catname in categories}
  File "D:/mifs-master_2/MU/learning-from-imbalanced-classes-master/learning-from-imbalanced-classes-master/continuous/Final Logit/logit-final.py", line 383, in <dictcomp>
    cat_means = {catname: np.mean(data_dict[catname]) for catname in categories}
KeyError: 'Monday'

提前致谢。

在此代码段中

for catname in categories:
    indices = np.where(category_labels == catname)
data_dict[catname] = values[indices]

data_dict[catname] 赋值发生在 for 循环之外, 所以将它缩进一级可以解决这个问题。

for catname in categories:
    indices = np.where(category_labels == catname)
    data_dict[catname] = values[indices]