Python scikit 学习 JSON
Python scikit-learn to JSON
我有一个使用 Python scikit-learn 构建的模型。我知道模型可以保存为 Pickle 或 Joblib 格式。是否有任何现有方法可以将作业保存为 JSON 格式?请参阅下面的模型构建代码以供参考:
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import pickle
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
names =['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
test_size = 0.33
seed = 7
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)
# Fit the model on 33%
model = LogisticRegression()
model.fit(X_train, Y_train)
filename = 'finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))
你必须自己做 serialization/deserialization 食谱。幸运的是,逻辑回归基本上可以通过系数和截距来捕获。但是,LogisticRegression
对象保留了一些其他元数据,我们也可以围绕这些元数据进行捕获。我将以下功能放在一起进行肮脏的工作。请记住,这仍然很粗糙:
import numpy as np
import json
from sklearn.linear_model import LogisticRegression
def logistic_regression_to_json(lrmodel, file=None):
if file is not None:
serialize = lambda x: json.dump(x, file)
else:
serialize = json.dumps
data = {}
data['init_params'] = lrmodel.get_params()
data['model_params'] = mp = {}
for p in ('coef_', 'intercept_','classes_', 'n_iter_'):
mp[p] = getattr(lrmodel, p).tolist()
return serialize(data)
def logistic_regression_from_json(jstring):
data = json.loads(jstring)
model = LogisticRegression(**data['init_params'])
for name, p in data['model_params'].items():
setattr(model, name, np.array(p))
return model
请注意,只需 'coef_', 'intercept_','classes_'
您就可以自己进行预测,因为逻辑回归是一个直接的线性模型,它只是矩阵乘法。
我有一个使用 Python scikit-learn 构建的模型。我知道模型可以保存为 Pickle 或 Joblib 格式。是否有任何现有方法可以将作业保存为 JSON 格式?请参阅下面的模型构建代码以供参考:
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import pickle
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
names =['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
test_size = 0.33
seed = 7
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)
# Fit the model on 33%
model = LogisticRegression()
model.fit(X_train, Y_train)
filename = 'finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))
你必须自己做 serialization/deserialization 食谱。幸运的是,逻辑回归基本上可以通过系数和截距来捕获。但是,LogisticRegression
对象保留了一些其他元数据,我们也可以围绕这些元数据进行捕获。我将以下功能放在一起进行肮脏的工作。请记住,这仍然很粗糙:
import numpy as np
import json
from sklearn.linear_model import LogisticRegression
def logistic_regression_to_json(lrmodel, file=None):
if file is not None:
serialize = lambda x: json.dump(x, file)
else:
serialize = json.dumps
data = {}
data['init_params'] = lrmodel.get_params()
data['model_params'] = mp = {}
for p in ('coef_', 'intercept_','classes_', 'n_iter_'):
mp[p] = getattr(lrmodel, p).tolist()
return serialize(data)
def logistic_regression_from_json(jstring):
data = json.loads(jstring)
model = LogisticRegression(**data['init_params'])
for name, p in data['model_params'].items():
setattr(model, name, np.array(p))
return model
请注意,只需 'coef_', 'intercept_','classes_'
您就可以自己进行预测,因为逻辑回归是一个直接的线性模型,它只是矩阵乘法。