y 应该是一个一维数组,取而代之的是一个形状数组 ()
y should be a 1d array, got an array of shape () instead
我有一个经过训练并保存的模型。我正在尝试根据新数据进一步训练模型,但它会出错。
相关部分代码:
from tensorflow.keras.preprocessing.text import Tokenizer
# The maximum number of words to be used. (most frequent)
MAX_NB_WORDS = 50000
# Max number of words in each complaint.
MAX_SEQUENCE_LENGTH = 250
# This is fixed.
EMBEDDING_DIM = 100
tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', lower=True)
tokenizer.fit_on_texts(master_df['Observation'].values)
word_index = tokenizer.word_index
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_df=1.0,min_df=1, stop_words=stop_words, max_features=10000, ngram_range=(1,3))
X=cv.fit_transform(X)
with open("../sgd.pickle", 'rb') as f:
sgd = pickle.load(f)
def output_sample(sentence):
test=preprocess_text(sentence)
test=test.lower()
#print(test)
test=[test]
tokenizer.fit_on_sequences(test)
new_words= tokenizer.word_index
#print(word_index)``
test1=cv.transform(test)
#print(test1)
output=sgd.predict(test1)
return output[0]
def retrain(X,y):
X=preprocess_text(X)
X=X.lower()
X=[X]
tokenizer.fit_on_texts(X)
new_words=tokenizer.word_index
X=cv.fit_transform(X)
sgd.fit(X,y)
with open('sgd.pickle', 'wb') as f:
pickle.dump(sgd, f)
print("Model trained on new data")
sentence=input("\n\nEnter your observation:\n\n")
output=output_sample(sentence)
print("\n\nThe risk prediction is",preprocess_text(output),"\n\n")
print("Is the above prediction correct?\n")
corr=input("Press 'y' for yes or 'n' for no.\n")
if corr=='y':
newy=np.array(output)
retrain(sentence,newy)
elif corr=='n':
print("What is the correct risk?\n1. Low\n2. Medium\n")
r=input("Enter the appropriate number: ")
if r=='1':
newy=np.array('Low')
retrain(sentence,newy)
elif r=='2':
newy=np.array('Medium')
retrain(sentence,newy)
else:
print("Incorrect input. Please restart the application.")
else:
print("Incorrect input. Please restart the application")
当程序为 运行 时,错误发生在 sgd.fit(X,y)
。错误是
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_11300/3528077041.py in <module>
5 newy=[output]
6 print(newy)
----> 7 retrain(sentence,newy)
8
9 elif corr=='n':
~\AppData\Local\Temp/ipykernel_11300/2433836763.py in retrain(X, y)
7 X=cv.fit_transform(X)
8 #y = y.reshape((-1, 1))
----> 9 sgd.fit(X,y)
10 with open('sgd.pickle', 'wb') as f:
11 pickle.dump(sgd, f)
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\pipeline.py in fit(self, X, y, **fit_params)
344 if self._final_estimator != 'passthrough':
345 fit_params_last_step = fit_params_steps[self.steps[-1][0]]
--> 346 self._final_estimator.fit(Xt, y, **fit_params_last_step)
347
348 return self
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\linear_model\_stochastic_gradient.py in fit(self, X, y, coef_init, intercept_init, sample_weight)
727 Returns an instance of self.
728 """
--> 729 return self._fit(X, y, alpha=self.alpha, C=1.0,
730 loss=self.loss, learning_rate=self.learning_rate,
731 coef_init=coef_init, intercept_init=intercept_init,
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\linear_model\_stochastic_gradient.py in _fit(self, X, y, alpha, C, loss, learning_rate, coef_init, intercept_init, sample_weight)
567 self.t_ = 1.0
568
--> 569 self._partial_fit(X, y, alpha, C, loss, learning_rate, self.max_iter,
570 classes, sample_weight, coef_init, intercept_init)
571
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\linear_model\_stochastic_gradient.py in _partial_fit(self, X, y, alpha, C, loss, learning_rate, max_iter, classes, sample_weight, coef_init, intercept_init)
529 max_iter=max_iter)
530 else:
--> 531 raise ValueError(
532 "The number of classes has to be greater than one;"
533 " got %d class" % n_classes)
ValueError: The number of classes has to be greater than one; got 1 class
数据样本如下:
Observation Risk
0 A separate road for light vehicle should be ma... Low
2 All benches were not having sufficient berm. Low
3 As light arrangement is not adequate. Low
4 As light arrangement is not adequate. Low
5 As contractor's equipment record is not availa... Low
77 First aid Room is not established. Medium
98 Heavy dust on haul road is found with in suffi... Medium
79 First aid station is maintained in the Rest sh... Medium
171 Presently explosive van is not available with ... Medium
79 First aid station is maintained in the Rest sh... Medium
理想情况下,它应该接受输入,但我不知道为什么会出现该错误。
我清理了代码并对 retrain
函数进行了一些更改,现在该函数将向训练集添加新的 String 和 Label 并再次适合分类器.您的代码的其他部分在逻辑上保持不变!
实用函数:
def output_sample(sentence):
test=preprocess_text(sentence)
test=test.lower()
test=[test]
tokenizer.fit_on_sequences(test)
new_words= tokenizer.word_index
test1=cv.transform(test)
output=sgd.predict(test1)
return output[0]
def preprocess_text(string):
# do whatever you want but return String afterward ;)
return string
def retrain(X,y):
X=preprocess_text(X)
X=X.lower()
X=[X]
X = cv.fit_transform(master_df['Observation']+X)
new_words=tokenizer.word_index
sgd.fit(X,master_df['Risk']+y)
with open('sgd.pickle', 'wb') as f:
pickle.dump(sgd, f)
print("Model trained on new data")
实际流量:
import numpy as np
import pickle
import nltk
from sklearn.feature_extraction.text import CountVectorizer
stopwords = nltk.corpus.stopwords.words('english')
cv=CountVectorizer(max_df=1.0,min_df=1, stop_words=stopwords, max_features=10000, ngram_range=(1,3))
master_df = pd.read_csv('classification.tsv')
X=cv.fit_transform(master_df['Observation'])
from sklearn.linear_model import SGDClassifier
try:
f = open("./sgd.pickle", 'rb')
sgd = pickle.load(f)
except:
sgd = SGDClassifier()
sgd.fit(X, master_df['Risk'].to_list())
sentence=input("\n\nEnter your observation:\n\n")
output=output_sample(sentence)
print("\n\nThe risk prediction is",preprocess_text(output),"\n\n")
print("Is the above prediction correct?\n")
corr=input("Press 'y' for yes or 'n' for no.\n")
if corr=='y':
newy=np.array(output)
retrain(sentence, newy)
elif corr=='n':
print("What is the correct risk?\n1. Low\n2. Medium\n")
r=input("Enter the appropriate number: ")
if r=='1':
newy=np.array('Low')
retrain(sentence,newy)
elif r=='2':
newy=np.array('Medium')
retrain(sentence,newy)
else:
print("Incorrect input. Please restart the application.")
else:
print("Incorrect input. Please restart the application")
我有一个经过训练并保存的模型。我正在尝试根据新数据进一步训练模型,但它会出错。 相关部分代码:
from tensorflow.keras.preprocessing.text import Tokenizer
# The maximum number of words to be used. (most frequent)
MAX_NB_WORDS = 50000
# Max number of words in each complaint.
MAX_SEQUENCE_LENGTH = 250
# This is fixed.
EMBEDDING_DIM = 100
tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', lower=True)
tokenizer.fit_on_texts(master_df['Observation'].values)
word_index = tokenizer.word_index
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_df=1.0,min_df=1, stop_words=stop_words, max_features=10000, ngram_range=(1,3))
X=cv.fit_transform(X)
with open("../sgd.pickle", 'rb') as f:
sgd = pickle.load(f)
def output_sample(sentence):
test=preprocess_text(sentence)
test=test.lower()
#print(test)
test=[test]
tokenizer.fit_on_sequences(test)
new_words= tokenizer.word_index
#print(word_index)``
test1=cv.transform(test)
#print(test1)
output=sgd.predict(test1)
return output[0]
def retrain(X,y):
X=preprocess_text(X)
X=X.lower()
X=[X]
tokenizer.fit_on_texts(X)
new_words=tokenizer.word_index
X=cv.fit_transform(X)
sgd.fit(X,y)
with open('sgd.pickle', 'wb') as f:
pickle.dump(sgd, f)
print("Model trained on new data")
sentence=input("\n\nEnter your observation:\n\n")
output=output_sample(sentence)
print("\n\nThe risk prediction is",preprocess_text(output),"\n\n")
print("Is the above prediction correct?\n")
corr=input("Press 'y' for yes or 'n' for no.\n")
if corr=='y':
newy=np.array(output)
retrain(sentence,newy)
elif corr=='n':
print("What is the correct risk?\n1. Low\n2. Medium\n")
r=input("Enter the appropriate number: ")
if r=='1':
newy=np.array('Low')
retrain(sentence,newy)
elif r=='2':
newy=np.array('Medium')
retrain(sentence,newy)
else:
print("Incorrect input. Please restart the application.")
else:
print("Incorrect input. Please restart the application")
当程序为 运行 时,错误发生在 sgd.fit(X,y)
。错误是
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_11300/3528077041.py in <module>
5 newy=[output]
6 print(newy)
----> 7 retrain(sentence,newy)
8
9 elif corr=='n':
~\AppData\Local\Temp/ipykernel_11300/2433836763.py in retrain(X, y)
7 X=cv.fit_transform(X)
8 #y = y.reshape((-1, 1))
----> 9 sgd.fit(X,y)
10 with open('sgd.pickle', 'wb') as f:
11 pickle.dump(sgd, f)
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\pipeline.py in fit(self, X, y, **fit_params)
344 if self._final_estimator != 'passthrough':
345 fit_params_last_step = fit_params_steps[self.steps[-1][0]]
--> 346 self._final_estimator.fit(Xt, y, **fit_params_last_step)
347
348 return self
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\linear_model\_stochastic_gradient.py in fit(self, X, y, coef_init, intercept_init, sample_weight)
727 Returns an instance of self.
728 """
--> 729 return self._fit(X, y, alpha=self.alpha, C=1.0,
730 loss=self.loss, learning_rate=self.learning_rate,
731 coef_init=coef_init, intercept_init=intercept_init,
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\linear_model\_stochastic_gradient.py in _fit(self, X, y, alpha, C, loss, learning_rate, coef_init, intercept_init, sample_weight)
567 self.t_ = 1.0
568
--> 569 self._partial_fit(X, y, alpha, C, loss, learning_rate, self.max_iter,
570 classes, sample_weight, coef_init, intercept_init)
571
~\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\linear_model\_stochastic_gradient.py in _partial_fit(self, X, y, alpha, C, loss, learning_rate, max_iter, classes, sample_weight, coef_init, intercept_init)
529 max_iter=max_iter)
530 else:
--> 531 raise ValueError(
532 "The number of classes has to be greater than one;"
533 " got %d class" % n_classes)
ValueError: The number of classes has to be greater than one; got 1 class
数据样本如下:
Observation Risk
0 A separate road for light vehicle should be ma... Low
2 All benches were not having sufficient berm. Low
3 As light arrangement is not adequate. Low
4 As light arrangement is not adequate. Low
5 As contractor's equipment record is not availa... Low
77 First aid Room is not established. Medium
98 Heavy dust on haul road is found with in suffi... Medium
79 First aid station is maintained in the Rest sh... Medium
171 Presently explosive van is not available with ... Medium
79 First aid station is maintained in the Rest sh... Medium
理想情况下,它应该接受输入,但我不知道为什么会出现该错误。
我清理了代码并对 retrain
函数进行了一些更改,现在该函数将向训练集添加新的 String 和 Label 并再次适合分类器.您的代码的其他部分在逻辑上保持不变!
实用函数:
def output_sample(sentence):
test=preprocess_text(sentence)
test=test.lower()
test=[test]
tokenizer.fit_on_sequences(test)
new_words= tokenizer.word_index
test1=cv.transform(test)
output=sgd.predict(test1)
return output[0]
def preprocess_text(string):
# do whatever you want but return String afterward ;)
return string
def retrain(X,y):
X=preprocess_text(X)
X=X.lower()
X=[X]
X = cv.fit_transform(master_df['Observation']+X)
new_words=tokenizer.word_index
sgd.fit(X,master_df['Risk']+y)
with open('sgd.pickle', 'wb') as f:
pickle.dump(sgd, f)
print("Model trained on new data")
实际流量:
import numpy as np
import pickle
import nltk
from sklearn.feature_extraction.text import CountVectorizer
stopwords = nltk.corpus.stopwords.words('english')
cv=CountVectorizer(max_df=1.0,min_df=1, stop_words=stopwords, max_features=10000, ngram_range=(1,3))
master_df = pd.read_csv('classification.tsv')
X=cv.fit_transform(master_df['Observation'])
from sklearn.linear_model import SGDClassifier
try:
f = open("./sgd.pickle", 'rb')
sgd = pickle.load(f)
except:
sgd = SGDClassifier()
sgd.fit(X, master_df['Risk'].to_list())
sentence=input("\n\nEnter your observation:\n\n")
output=output_sample(sentence)
print("\n\nThe risk prediction is",preprocess_text(output),"\n\n")
print("Is the above prediction correct?\n")
corr=input("Press 'y' for yes or 'n' for no.\n")
if corr=='y':
newy=np.array(output)
retrain(sentence, newy)
elif corr=='n':
print("What is the correct risk?\n1. Low\n2. Medium\n")
r=input("Enter the appropriate number: ")
if r=='1':
newy=np.array('Low')
retrain(sentence,newy)
elif r=='2':
newy=np.array('Medium')
retrain(sentence,newy)
else:
print("Incorrect input. Please restart the application.")
else:
print("Incorrect input. Please restart the application")