Python 中的 KNN 实施

KNN implementation in Python

我正在尝试在 Python 中实现一个简单的 KNN 技术,我使用每分钟的股价数据,并使用我的 x 变量作为开盘价、收盘价和成交量数据来预测下一分钟的开盘价.我的代码如下:-

import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
from pylab import rcParams
import urllib
import sklearn
from sklearn.neighbors import KNeighborsRegressor
from sklearn import neighbors
from sklearn import preprocessing
from sklearn.cross_validation import train_test_split
from sklearn import metrics 
from googlefinance.client import get_price_data, get_prices_data, get_prices_time_data
import copy

np.set_printoptions(precision = 4, suppress = True)
rcParams['figure.figsize']=7,4
plt.style.use('seaborn-whitegrid')




param = {'q':"DJUSBK", 'i':"60",'x':"INDEXDJX",'p':"1Y"} # Dow Joes Banks
djusbk = get_price_data(param)
ticker_list=['ASB','BXS','BAC','BOH','BKU'] # 5 stocks from the Dow Jones Bank Index
ticker_dict = {}
for i in ticker_list :
    param = {'q':i, 'i':"60",'x':"NYSE",'p':"1Y"}
    df = get_price_data(param)
    x=i
    ticker_dict[x] = df

asb = copy.deepcopy(ticker_dict['ASB'])
asb_prime = pd.DataFrame(asb['Open'])
asb_prime['Close'] = asb['Close']
asb_prime['Volume'] = asb['Volume']

asb_prime_copy = copy.deepcopy(asb_prime)


# Splitting your data into test and training data sets
X_prime = asb_prime_copy.ix[:,(0,1,2)].values
asb_open_next = pd.DataFrame(copy.deepcopy(asb['Open']))
asb_open_next.drop(asb_open_next.index[:1], inplace=True)

asb_prime_copy= asb_prime_copy[:-1]

X_prime = asb_prime_copy.ix[:,(0,1,2)].values
y = asb_open_next.ix[:,(0)].values

X = preprocessing.scale(X_prime)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.5,random_state = 17)

#Building and Training Model  with Training Data
clf = neighbors.KNeighborsRegressor()
clf.fit(X_train,y_train)
print(clf)

# Evaluating the model's predictions against the test dataset
y_expect=y_test

y_pred= clf.predict(X_test)
print(metrics.classification_report(y_expect,y_pred))

最后我遇到了错误。不确定为什么?我正在使用 Python 3.x

  File "C:\Users\gg\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py", line 97, in unique_labels
    raise ValueError("Unknown label type: %s" % repr(ys))

ValueError: Unknown label type: (array([ 28.2  ,  28.375,  28.325, ...,  28.075,  28.275,  28.1  ]), array([ 28.23 ,  28.4  ,  28.32 , ...,  28.055,  28.28 ,  28.08 ]))

正如下面的答案所建议的那样,KNeighborsClassifier() 已更新为 KNeighborsRegressor() 并且已经解决了之前的问题

您正在处理回归问题:预测价格。所以从 KNeighborsClassifier 切换到 KNeighborsRegressor 将解决这个问题。