在 python 中编码逻辑回归时出现 AttributeError
AttributeError when coding logistic regression in python
你好,我正在尝试学习 python 和使用威斯康星乳腺癌数据集的机器学习,我正在尝试从头开始编写逻辑回归代码,但一直出现属性错误
('AttributeError: 'DataFrame' object has no attribute 'target') 在我的代码的第一部分(Y = data.target),我无法在网上找到解决方案。我知道它的其余部分可能也不适用于数据,我从我在网上看到的不同事物中获取它,但我目前只是试图一次解决一个问题以了解逻辑回归的工作原理。如果能提供任何帮助,我将不胜感激。
这是我的代码:
data = pd.read_csv("C:\Users\Hannah\Desktop\Research Project\data.csv",header=0)
X = data.values[:, :30]
Y = data.target['diagnosis']
X = X[:500,:]
Y = Y[:500]
def sigmoid(z):
s= 1/(1 + np.exp(-z))
return s
def propagate(w, b, X, Y):
m = X.shape[1]
A = sigmoid(np.dot(w.T,X)+b)
cost = -1/m * np.sum(Y * np.log(A) + (1-Y) * (np.log(1-A)))
dz= (1/m)*(A - Y)
dw = np.dot(X, dz.T)
db = np.sum(dz)
cost = np.squeeze(cost)
grads = {"dw": dw,
"db": db}
return grads, cost
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
costs = []
for i in range(num_iterations):
m = X.shape[1]
grads,cost = propagate(w, b, X, Y)
b = b - learning_rate*grads["db"]
w = w - learning_rate*grads["dw"]
if i % 100 == 0:
costs.append(cost)
if print_cost and i % 100 == 0:
print ("Cost after iteration %i: %f" %(i, cost))
params = {"w": w,
"b": b}
return params, grads, costs
def predict(w, b, X):
m = X.shape[1]
Y_prediction = np.zeros((1,m))
w = w.reshape(X.shape[0], 1)
A = sigmoid(np.dot(w.T,X)+ b)
for i in range(A.shape[1]):
x_exp = np.exp(A)
x_sum = np.sum(x_exp,axis=1,keepdims=True)
s = np.divide(x_exp,x_sum)
Y_prediction = 1. * (A > 0.5)
return Y_prediction
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
w, b = initialize_with_zeros(X_train.shape[0])
print("learning rate:",learning_rate)
parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost = False)
w = parameters["w"]
b = parameters["b"]
Y_prediction_train = predict(w,b,X_train)
Y_prediction_test = predict(w,b,X_test)
d = {"costs": costs,
"Y_prediction_test": Y_prediction_test,
"Y_prediction_train" : Y_prediction_train,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations": num_iterations}
return d
d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)
据我了解,您正在使用 pandas
读取 csv 并希望将列 diagnosis
中的值分配给变量 Y
。在这种情况下,您不需要 target
。根据错误,DataFrame 根本没有 属性。 data['diagnosis']
应该 return 你需要的。
你好,我正在尝试学习 python 和使用威斯康星乳腺癌数据集的机器学习,我正在尝试从头开始编写逻辑回归代码,但一直出现属性错误 ('AttributeError: 'DataFrame' object has no attribute 'target') 在我的代码的第一部分(Y = data.target),我无法在网上找到解决方案。我知道它的其余部分可能也不适用于数据,我从我在网上看到的不同事物中获取它,但我目前只是试图一次解决一个问题以了解逻辑回归的工作原理。如果能提供任何帮助,我将不胜感激。
这是我的代码:
data = pd.read_csv("C:\Users\Hannah\Desktop\Research Project\data.csv",header=0)
X = data.values[:, :30]
Y = data.target['diagnosis']
X = X[:500,:]
Y = Y[:500]
def sigmoid(z):
s= 1/(1 + np.exp(-z))
return s
def propagate(w, b, X, Y):
m = X.shape[1]
A = sigmoid(np.dot(w.T,X)+b)
cost = -1/m * np.sum(Y * np.log(A) + (1-Y) * (np.log(1-A)))
dz= (1/m)*(A - Y)
dw = np.dot(X, dz.T)
db = np.sum(dz)
cost = np.squeeze(cost)
grads = {"dw": dw,
"db": db}
return grads, cost
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
costs = []
for i in range(num_iterations):
m = X.shape[1]
grads,cost = propagate(w, b, X, Y)
b = b - learning_rate*grads["db"]
w = w - learning_rate*grads["dw"]
if i % 100 == 0:
costs.append(cost)
if print_cost and i % 100 == 0:
print ("Cost after iteration %i: %f" %(i, cost))
params = {"w": w,
"b": b}
return params, grads, costs
def predict(w, b, X):
m = X.shape[1]
Y_prediction = np.zeros((1,m))
w = w.reshape(X.shape[0], 1)
A = sigmoid(np.dot(w.T,X)+ b)
for i in range(A.shape[1]):
x_exp = np.exp(A)
x_sum = np.sum(x_exp,axis=1,keepdims=True)
s = np.divide(x_exp,x_sum)
Y_prediction = 1. * (A > 0.5)
return Y_prediction
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
w, b = initialize_with_zeros(X_train.shape[0])
print("learning rate:",learning_rate)
parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost = False)
w = parameters["w"]
b = parameters["b"]
Y_prediction_train = predict(w,b,X_train)
Y_prediction_test = predict(w,b,X_test)
d = {"costs": costs,
"Y_prediction_test": Y_prediction_test,
"Y_prediction_train" : Y_prediction_train,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations": num_iterations}
return d
d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)
据我了解,您正在使用 pandas
读取 csv 并希望将列 diagnosis
中的值分配给变量 Y
。在这种情况下,您不需要 target
。根据错误,DataFrame 根本没有 属性。 data['diagnosis']
应该 return 你需要的。