Pandas 中的周期性 KeyError
Periodic KeyError in Pandas
我想使用已经存在的随机值替换数据框中的空值,同时保持权重,这样相关性就不会受到影响,数据也不会丢失。
def nan_fill_random(column_name, nan):
for i in range(len(column_name)):
if column_name[i] == nan:
column_name[i] = random.choice(column_name[column_name != nan])
else:
continue
我写了一个函数,但它周期性地抛出一个KeyError
:并且值有不同的数字,我假设索引。此外,当您重新启动单元时,它可能会消失或更新。
nan_fill_random(data['education'], 'unknown')
这里是错误
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
W:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
W:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 14563
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_4720/2723938638.py in <module>
----> 1 nan_fill_random(data['education'], 'unknown')
~\AppData\Local\Temp/ipykernel_4720/1980306790.py in nan_fill_random(column_name, nan)
2 for i in range(len(column_name)):
3 if column_name[i] == nan:
----> 4 column_name[i] = random.choice(column_name[column_name != nan])
5 else:
6 continue
W:\ProgramData\Anaconda3\lib\random.py in choice(self, seq)
344 """Choose a random element from a non-empty sequence."""
345 # raises IndexError if seq is empty
--> 346 return seq[self._randbelow(len(seq))]
347
348 def shuffle(self, x, random=None):
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
940
941 elif key_is_scalar:
--> 942 return self._get_value(key)
943
944 if is_hashable(key):
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in _get_value(self, label, takeable)
1049
1050 # Similar to Index.get_value, but we do not fall back to positional
-> 1051 loc = self.index.get_loc(label)
1052 return self.index._get_values_for_loc(self, loc, label)
1053
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 14563
def nan_fill_random(column_name, nan):
list_values = set(column_name)
try :
list_values.remove(nan)
except :
return(column_name)
column_name = column_name.apply(lambda x: x if x != nan else random.choice(list(list_values)))
return(column_name)
我想使用已经存在的随机值替换数据框中的空值,同时保持权重,这样相关性就不会受到影响,数据也不会丢失。
def nan_fill_random(column_name, nan):
for i in range(len(column_name)):
if column_name[i] == nan:
column_name[i] = random.choice(column_name[column_name != nan])
else:
continue
我写了一个函数,但它周期性地抛出一个KeyError
:并且值有不同的数字,我假设索引。此外,当您重新启动单元时,它可能会消失或更新。
nan_fill_random(data['education'], 'unknown')
这里是错误
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
W:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
W:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 14563
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_4720/2723938638.py in <module>
----> 1 nan_fill_random(data['education'], 'unknown')
~\AppData\Local\Temp/ipykernel_4720/1980306790.py in nan_fill_random(column_name, nan)
2 for i in range(len(column_name)):
3 if column_name[i] == nan:
----> 4 column_name[i] = random.choice(column_name[column_name != nan])
5 else:
6 continue
W:\ProgramData\Anaconda3\lib\random.py in choice(self, seq)
344 """Choose a random element from a non-empty sequence."""
345 # raises IndexError if seq is empty
--> 346 return seq[self._randbelow(len(seq))]
347
348 def shuffle(self, x, random=None):
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
940
941 elif key_is_scalar:
--> 942 return self._get_value(key)
943
944 if is_hashable(key):
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in _get_value(self, label, takeable)
1049
1050 # Similar to Index.get_value, but we do not fall back to positional
-> 1051 loc = self.index.get_loc(label)
1052 return self.index._get_values_for_loc(self, loc, label)
1053
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 14563
def nan_fill_random(column_name, nan):
list_values = set(column_name)
try :
list_values.remove(nan)
except :
return(column_name)
column_name = column_name.apply(lambda x: x if x != nan else random.choice(list(list_values)))
return(column_name)