使用 pandas 数据框时出现 KeyError
KeyError while using pandas dataframe
我正在尝试使用 python 实现自定义性能指标。 objective 是计算给出度量 A 最低值的最佳概率阈值。
我写了下面的代码来计算混淆矩阵和阈值。
def confusion_matrix(self):
"""This method returns the confusion matrix for the given pair of Y and Y_Predicted"""
#y,ypred
y = self.df["y"]
ypred = self.df["ypred"]
self.setVariables()
try:
assert len(y) == len(ypred)
for val in range(len(df["proba"])):
print(val)
if y[val] == 1 and ypred[val] == 1:
self._truePositive +=1
if y[val] == 1 and ypred[val] == 0:
self._trueNegative +=1
if y[val] == 0 and ypred[val] == 1:
self._falsePositive +=1
if y[val] == 0 and ypred[val] == 0:
self._falseNegtive +=1
for i in self._truePositive,self._trueNegative,self._falsePositive,self._falseNegtive:
self._cnf_matrix.append(i)
cnfMatrix = self._cnf_matrix.copy()
return np.array(cnfMatrix).reshape(2,2)
except AssertionError:
print("Input Error: Length of y and ypred is not same.")
def metricForLowestValues(self):
"""Compute the best threshold of probability which gives lowest values of metric A"""
dict_metricA = {}
for item in tqdm(self.df['proba']):
if item != None:
self.predict(item)
cnf = self.confusion_matrix()
# A=500×number of false negative+100×numebr of false positive
metricA = 500 * self._falseNegtive + 100* self._falsePositive
dict_metricA[item] = metricA
self.df.drop(columns=["ypred"],inplace=True)
sorted_metricAList = sorted(dict_metricA.items(),key=lambda item:item[1])
minKey = sorted_metricAList[0][0]
minValue = dict_metricA[minKey]
return minKey, minValue
但是当我尝试 运行 这段代码时,它在计算混淆矩阵时给我以下 KeyError 错误。
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-164-38aae4fab9c1> in <module>
----> 1 performance3.metricForLowestValues()
<ipython-input-148-fe3aeec53878> in metricForLowestValues(self)
91 if item != None:
92 self.predict(item)
---> 93 cnf = self.confusion_matrix()
94 # A=500×number of false negative+100×numebr of false positive
95 metricA = 500 * self._falseNegtive + 100* self._falsePositive
<ipython-input-148-fe3aeec53878> in confusion_matrix(self)
30 for val in range(len(df["proba"])):
31 print(val)
---> 32 if y[val] == 1 and ypred[val] == 1:
33 self._truePositive +=1
34 if y[val] == 1 and ypred[val] == 0:
~/Anaconda/anaconda3/lib/python3.8/site-packages/pandas/core/series.py in __getitem__(self, key)
869 key = com.apply_if_callable(key, self)
870 try:
--> 871 result = self.index.get_value(self, key)
872
873 if not is_scalar(result):
~/Anaconda/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
4403 k = self._convert_scalar_indexer(k, kind="getitem")
4404 try:
-> 4405 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
4406 except KeyError as e1:
4407 if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 2852
我使用的数据集是 (2852, 2) 形状的。理想情况下,迭代应该发生在 0 到 2851 之间,因为总行数是 2852。我相信这个错误的发生可能是由于生成了一个额外的行,但我不确定如何修复它。我尝试过滤掉 metricForLowestValues 函数中的 None 值,但不是运气。
我在做什么。有事吗?非常感谢任何见解。
错误可能是您迭代了 self.df['proba']
而不是 df['proba']
的长度吗?迭代 len(y)
可能更容易,因为您知道这将具有正确的长度。如果您发布 df.tail()
.
的输出会很好
我正在尝试使用 python 实现自定义性能指标。 objective 是计算给出度量 A 最低值的最佳概率阈值。 我写了下面的代码来计算混淆矩阵和阈值。
def confusion_matrix(self):
"""This method returns the confusion matrix for the given pair of Y and Y_Predicted"""
#y,ypred
y = self.df["y"]
ypred = self.df["ypred"]
self.setVariables()
try:
assert len(y) == len(ypred)
for val in range(len(df["proba"])):
print(val)
if y[val] == 1 and ypred[val] == 1:
self._truePositive +=1
if y[val] == 1 and ypred[val] == 0:
self._trueNegative +=1
if y[val] == 0 and ypred[val] == 1:
self._falsePositive +=1
if y[val] == 0 and ypred[val] == 0:
self._falseNegtive +=1
for i in self._truePositive,self._trueNegative,self._falsePositive,self._falseNegtive:
self._cnf_matrix.append(i)
cnfMatrix = self._cnf_matrix.copy()
return np.array(cnfMatrix).reshape(2,2)
except AssertionError:
print("Input Error: Length of y and ypred is not same.")
def metricForLowestValues(self):
"""Compute the best threshold of probability which gives lowest values of metric A"""
dict_metricA = {}
for item in tqdm(self.df['proba']):
if item != None:
self.predict(item)
cnf = self.confusion_matrix()
# A=500×number of false negative+100×numebr of false positive
metricA = 500 * self._falseNegtive + 100* self._falsePositive
dict_metricA[item] = metricA
self.df.drop(columns=["ypred"],inplace=True)
sorted_metricAList = sorted(dict_metricA.items(),key=lambda item:item[1])
minKey = sorted_metricAList[0][0]
minValue = dict_metricA[minKey]
return minKey, minValue
但是当我尝试 运行 这段代码时,它在计算混淆矩阵时给我以下 KeyError 错误。
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-164-38aae4fab9c1> in <module>
----> 1 performance3.metricForLowestValues()
<ipython-input-148-fe3aeec53878> in metricForLowestValues(self)
91 if item != None:
92 self.predict(item)
---> 93 cnf = self.confusion_matrix()
94 # A=500×number of false negative+100×numebr of false positive
95 metricA = 500 * self._falseNegtive + 100* self._falsePositive
<ipython-input-148-fe3aeec53878> in confusion_matrix(self)
30 for val in range(len(df["proba"])):
31 print(val)
---> 32 if y[val] == 1 and ypred[val] == 1:
33 self._truePositive +=1
34 if y[val] == 1 and ypred[val] == 0:
~/Anaconda/anaconda3/lib/python3.8/site-packages/pandas/core/series.py in __getitem__(self, key)
869 key = com.apply_if_callable(key, self)
870 try:
--> 871 result = self.index.get_value(self, key)
872
873 if not is_scalar(result):
~/Anaconda/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
4403 k = self._convert_scalar_indexer(k, kind="getitem")
4404 try:
-> 4405 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
4406 except KeyError as e1:
4407 if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 2852
我使用的数据集是 (2852, 2) 形状的。理想情况下,迭代应该发生在 0 到 2851 之间,因为总行数是 2852。我相信这个错误的发生可能是由于生成了一个额外的行,但我不确定如何修复它。我尝试过滤掉 metricForLowestValues 函数中的 None 值,但不是运气。
我在做什么。有事吗?非常感谢任何见解。
错误可能是您迭代了 self.df['proba']
而不是 df['proba']
的长度吗?迭代 len(y)
可能更容易,因为您知道这将具有正确的长度。如果您发布 df.tail()
.