我在 python 中编码时遇到小数舍入问题
I'm having trouble rounding decimals while encoding in python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import OrdinalEncoder
df = pd.read_csv("mushrooms.csv",index_col=False,header=None)
def n(target):
if target == 'p':
return 1
elif target == 'e':
return 0
df[0] = df[0].apply(n)
#manually encoding the targets
targets = df[0]
inputs = df[df.columns[1:]]
def test_train_split(mydf, inputs, tratio, target):
splitter = StratifiedShuffleSplit(n_splits = 1, test_size = tratio, random_state = 42)
train_index, test_index = next(splitter.split(inputs, target))
strat_train = mydf.iloc[train_index]
strat_test = mydf.iloc[test_index]
return strat_train, strat_test
def print_test_train_dfs(train_df, test_df, target_column = 'None'):
print("\nTraining data:")
train_df.info()
if target_column != 'None':
print(train_df[target_column].value_counts())
print('\nTest data:')
test_df.info()
if target_column != 'None':
print(test_df[target_column].value_counts())
traindf, testdf = test_train_split(df, inputs, 0.2, targets)
enc = OrdinalEncoder()
enc.fit(traindf)
df = enc.transform(testdf)
for i in range(len(df)):
for j in range(len(df[1])):
df[i][j].round(0)
df = pd.DataFrame.from_records(df)
print(df)
df 总是以 1.0 之类的小数结尾,而不是我想要的 1。
我使用的数据集在这里
https://www.kaggle.com/uciml/mushroom-classification
我还要补充一点,在 .transform 之后,df 更像是一个数组而不是数据帧
df.astype(int)
应加载为整数
请参阅此问题以获取更多信息
Change data type of columns in Pandas
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import OrdinalEncoder
df = pd.read_csv("mushrooms.csv",index_col=False,header=None)
def n(target):
if target == 'p':
return 1
elif target == 'e':
return 0
df[0] = df[0].apply(n)
#manually encoding the targets
targets = df[0]
inputs = df[df.columns[1:]]
def test_train_split(mydf, inputs, tratio, target):
splitter = StratifiedShuffleSplit(n_splits = 1, test_size = tratio, random_state = 42)
train_index, test_index = next(splitter.split(inputs, target))
strat_train = mydf.iloc[train_index]
strat_test = mydf.iloc[test_index]
return strat_train, strat_test
def print_test_train_dfs(train_df, test_df, target_column = 'None'):
print("\nTraining data:")
train_df.info()
if target_column != 'None':
print(train_df[target_column].value_counts())
print('\nTest data:')
test_df.info()
if target_column != 'None':
print(test_df[target_column].value_counts())
traindf, testdf = test_train_split(df, inputs, 0.2, targets)
enc = OrdinalEncoder()
enc.fit(traindf)
df = enc.transform(testdf)
for i in range(len(df)):
for j in range(len(df[1])):
df[i][j].round(0)
df = pd.DataFrame.from_records(df)
print(df)
df 总是以 1.0 之类的小数结尾,而不是我想要的 1。
我使用的数据集在这里 https://www.kaggle.com/uciml/mushroom-classification
我还要补充一点,在 .transform 之后,df 更像是一个数组而不是数据帧
df.astype(int)
应加载为整数
请参阅此问题以获取更多信息 Change data type of columns in Pandas