将代码从二元 class 逻辑回归修改为多 class "one vs all" 逻辑回归
Modifying code from binary classifier logistic regression to multi-class "one vs all" logistic regression
我是机器学习的新手,正在尝试练习不同的算法,目前我 class使用 Logistic 回归验证从 sklearn 生成的随机数据集。现在这是一个二进制 class 化器,但是我想使用多 class 逻辑回归 "one vs all" 方法(稍后进行比较)。
下面是我尝试为二进制 classification 实现的代码:
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import random
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
X, t = make_blobs(n_samples=[400, 800, 400], centers=[[0,0], [1,2], [2,3]],
n_features=2, random_state=2019)
indices = np.arange(X.shape[0])
random.seed(2020)
random.shuffle(indices)
indices[:10]
X_train = X[indices[:800], :]
X_val = X[indices[800:1200], :]
X_test = X[indices[1200:], :]
t_train = t[indices[:800]]
t_val = t[indices[800:1200]]
t_test = t[indices[1200:]]
t2_train = t_train == 1
t2_train = t2_train.astype('int')
t2_val = (t_val == 1).astype('int')
t2_test = (t_test == 1).astype('int')
def add_bias(X):
# Put bias in position 0
sh = X.shape
if len(sh) == 1:
#X is a vector
return np.concatenate([np.array([1]), X])
else:
# X is a matrix
m = sh[0]
bias = np.ones((m, 1)) # Makes a m*1 matrix of 1-s
return np.concatenate([bias, X], axis = 1)
class NumpyClassifier():
# Common methods to all numpy classifiers --- if any
def accuracy(self, X_val, t_val, **kwargs):
pred = self.predict(X_val, **kwargs)
if len(pred.shape) > 1:
pred = pred[:, 0]
return sum(pred==t_val)/len(pred)
# code for Logistic Regression
def logistic(x):
return 1/(1+np.exp(-x))
class NumpyLogReg(NumpyClassifier):
def fit(self, X_train, t_train, gamma = 0.1, epochs=10):
# X_train is a Nxm matrix, N data points, m features
# t_train are the targets values for training data
(k, m) = X_train.shape
X_train = add_bias(X_train)
self.theta = theta = np.zeros(m+1)
for e in range(epochs):
theta -= gamma / k * X_train.T @ (self.forward(X_train) - t_train)
def forward(self, X_val):
return logistic(X_val @ self.theta)
def score(self, X_val):
z = add_bias(X_val)
score = self.forward(z)
return score
def predict(self, X_val, threshold=0.5):
z = add_bias(X_val)
score = self.forward(z)
# score = z @ self.theta
return (score>threshold).astype('int')
lr_cl = NumpyLogReg()
lr_cl.fit(X_train, t_train)
lr_cl.predict(X_val)
lr_cl.accuracy(X_val, t_val)
for e in [1, 2, 5, 10, 50, 100, 1000, 10000, 100000, 1000000]:
lr_cl = NumpyLogReg()
lr_cl.fit(X_train, t_train, epochs=e, gamma=0.00001)
print("{:10} {:7.3f}".format(e, lr_cl.accuracy(X_val, t_val)))
我需要关于如何将代码修改为多 class "one vs all"/ "one vs rest" 逻辑回归的建议/提示。
我不想直接使用 sklearn 导入的逻辑回归算法,而是像这样从头开始。
非常感谢任何建议,提前致谢。
我假设 NumpyLogReg
在二进制 class 化上工作得很好。使用相同的 class multi-class
classification 使用 One-Vs-Rest
(ovr) 技术。
让我们假设数据集有 3 classes A, B, C
- 调用带有 class 标签
A
的二进制 class化模型作为 +ve class & B, C
作为 -ve class并记下概率分数
- 通过将
B
视为 +ve & A, C
视为 -ve 并将 C
视为 +ve & A, B
视为 -ve 来重复相同的操作。记下各自的概率分数。
- 基本上,如果有
n
classes,就会有 n
个二进制 classifier 模型,即 fitting one classifier per class
- 通过仔细检查每个 classes 的 classifier(即通过分析概率值),您可以实现
multi-class
classification & 该模型将具有高度可解释性。
更详细的解释请参考this guideline
我是机器学习的新手,正在尝试练习不同的算法,目前我 class使用 Logistic 回归验证从 sklearn 生成的随机数据集。现在这是一个二进制 class 化器,但是我想使用多 class 逻辑回归 "one vs all" 方法(稍后进行比较)。
下面是我尝试为二进制 classification 实现的代码:
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import random
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
X, t = make_blobs(n_samples=[400, 800, 400], centers=[[0,0], [1,2], [2,3]],
n_features=2, random_state=2019)
indices = np.arange(X.shape[0])
random.seed(2020)
random.shuffle(indices)
indices[:10]
X_train = X[indices[:800], :]
X_val = X[indices[800:1200], :]
X_test = X[indices[1200:], :]
t_train = t[indices[:800]]
t_val = t[indices[800:1200]]
t_test = t[indices[1200:]]
t2_train = t_train == 1
t2_train = t2_train.astype('int')
t2_val = (t_val == 1).astype('int')
t2_test = (t_test == 1).astype('int')
def add_bias(X):
# Put bias in position 0
sh = X.shape
if len(sh) == 1:
#X is a vector
return np.concatenate([np.array([1]), X])
else:
# X is a matrix
m = sh[0]
bias = np.ones((m, 1)) # Makes a m*1 matrix of 1-s
return np.concatenate([bias, X], axis = 1)
class NumpyClassifier():
# Common methods to all numpy classifiers --- if any
def accuracy(self, X_val, t_val, **kwargs):
pred = self.predict(X_val, **kwargs)
if len(pred.shape) > 1:
pred = pred[:, 0]
return sum(pred==t_val)/len(pred)
# code for Logistic Regression
def logistic(x):
return 1/(1+np.exp(-x))
class NumpyLogReg(NumpyClassifier):
def fit(self, X_train, t_train, gamma = 0.1, epochs=10):
# X_train is a Nxm matrix, N data points, m features
# t_train are the targets values for training data
(k, m) = X_train.shape
X_train = add_bias(X_train)
self.theta = theta = np.zeros(m+1)
for e in range(epochs):
theta -= gamma / k * X_train.T @ (self.forward(X_train) - t_train)
def forward(self, X_val):
return logistic(X_val @ self.theta)
def score(self, X_val):
z = add_bias(X_val)
score = self.forward(z)
return score
def predict(self, X_val, threshold=0.5):
z = add_bias(X_val)
score = self.forward(z)
# score = z @ self.theta
return (score>threshold).astype('int')
lr_cl = NumpyLogReg()
lr_cl.fit(X_train, t_train)
lr_cl.predict(X_val)
lr_cl.accuracy(X_val, t_val)
for e in [1, 2, 5, 10, 50, 100, 1000, 10000, 100000, 1000000]:
lr_cl = NumpyLogReg()
lr_cl.fit(X_train, t_train, epochs=e, gamma=0.00001)
print("{:10} {:7.3f}".format(e, lr_cl.accuracy(X_val, t_val)))
我需要关于如何将代码修改为多 class "one vs all"/ "one vs rest" 逻辑回归的建议/提示。 我不想直接使用 sklearn 导入的逻辑回归算法,而是像这样从头开始。
非常感谢任何建议,提前致谢。
我假设 NumpyLogReg
在二进制 class 化上工作得很好。使用相同的 class multi-class
classification 使用 One-Vs-Rest
(ovr) 技术。
让我们假设数据集有 3 classes A, B, C
- 调用带有 class 标签
A
的二进制 class化模型作为 +ve class &B, C
作为 -ve class并记下概率分数 - 通过将
B
视为 +ve &A, C
视为 -ve 并将C
视为 +ve &A, B
视为 -ve 来重复相同的操作。记下各自的概率分数。 - 基本上,如果有
n
classes,就会有n
个二进制 classifier 模型,即fitting one classifier per class
- 通过仔细检查每个 classes 的 classifier(即通过分析概率值),您可以实现
multi-class
classification & 该模型将具有高度可解释性。
更详细的解释请参考this guideline