Sklearn MLPRegressor 中的 RandomGridSearchCV 错误
Error with RandomGridSearchCV in Sklearn MLPRegressor
我在互联网上发现了类似的问题,但略有不同,none 的解决方案对我有用。
我有一组解释变量 X (2085,12) 和一个解释变量 y (2085,1),我必须对其进行一些处理,包括使用这些 sklearn 类(如标题)。为了获得正确的超参数,我将代码安排如下:
#solver: sgd
mlpsgd = MLPRegressor(max_iter = 1000, solver='sgd')
alpha = [float(x) for x in np.logspace(start = -6, stop = 3, num = 100)]
hidden_layer_sizes = [(int(x),int(y),int(z)) for x in np.logspace(start = 0, stop = 2.2, num = 8) for y in np.logspace(start = 0, stop = 2.2, num = 8) for z in np.logspace(start = 0, stop = 2.2, num = 8)]
hidden_layer_sizes.extend((int(x),int(y)) for x in np.logspace(start = 0, stop = 2, num = 25) for y in np.logspace(start = 0, stop = 2, num = 25))
hidden_layer_sizes.extend((int(x),) for x in np.logspace(start = 1, stop = 2, num = 1000))
activation = ['logistic', 'tanh', 'relu']
learning_rate = ['constant', 'invscaling','adaptive']
learning_rate_init = [float(x) for x in np.logspace(start = -5, stop = 0, num = 20)]
random_grid3 = {'learning_rate': learning_rate,'activation': activation,'learning_rate_init': learning_rate_init, 'hidden_layer_sizes': hidden_layer_sizes, 'alpha': alpha}
mlp_random3 = RandomizedSearchCV(estimator = mlpsgd, param_distributions = random_grid3, n_iter = 350, n_jobs=-1)
mlp_random3.fit(X, y)
现在我知道整个随机网格非常大,但我什至尝试了一个非常小的网格,这不是问题(这样它更适合我认为的研究类型做),我应该提到我使用 windows 并且程序以
开头
if __name__ == '__main__':
据我所知(希望是正确的),我在附加的第一部分代码的倒数第二行中要求的多处理是必需的。
好吧,当我 运行 代码时,350 次迭代中的一些代码被正确处理,但随后它停止并打印此错误:
Traceback (most recent call last):
File "c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py", line 164, in <module>
perc = mlpottimizzata(x_train,y_train[:,i])
File "c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py", line 72, in mlpottimizzata
mlp_random3.fit(x_train, y_train)
File "C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\model_selection\_search.py", line 639, in fit
cv.split(X, y, groups)))
File "C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\externals\joblib\parallel.py", line 789, in __call__
self.retrieve()
File "C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\externals\joblib\parallel.py", line 740, in retrieve
raise exception
sklearn.externals.joblib.my_exceptions.JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
c:\Users\mat\.vscode\extensions\ms-python.python-2018.6.0\pythonFiles\PythonTools\visualstudio_py_launcher.py in <module>()
86 del sys, os
87
88 # and start debugging
89 ## Begin modification by Don Jayamanne
90 # Pass current Process id to pass back to debugger
---> 91 vspd.debug(filename, port_num, debug_id, debug_options, currentPid, run_as)
92 ## End Modification by Don Jayamanne
...........................................................................
c:\Users\mat\.vscode\extensions\ms-python.python-2018.6.0\pythonFiles\PythonTools\visualstudio_py_debugger.py in debug(file=r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py', port_num=58990, debug_id='34806ad9-833a-4524-8cd6-18ca4aa74f14', debug_options={'RedirectOutput'}, currentPid=10548, run_as='script')
2620 if run_as == 'module':
2621 exec_module(file, globals_obj)
2622 elif run_as == 'code':
2623 exec_code(file, '<string>', globals_obj)
2624 else:
-> 2625 exec_file(file, globals_obj)
file = r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py'
globals_obj = {'__name__': '__main__'}
2626 finally:
2627 sys.settrace(None)
2628 THREADS_LOCK.acquire()
2629 del THREADS[cur_thread.id]
...........................................................................
c:\Users\mat\.vscode\extensions\ms-python.python-2018.6.0\pythonFiles\PythonTools\visualstudio_py_util.py in exec_file(file=r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py', global_variables={'__name__': '__main__'})
114 f = open(file, "rb")
115 try:
116 code = f.read().replace(to_bytes('\r\n'), to_bytes('\n')) + to_bytes('\n')
117 finally:
118 f.close()
--> 119 exec_code(code, file, global_variables)
code = b'import pandas as p\nimport numpy as np\nimport....score(x_train, y_train[:,i]))\n print(err)\n'
file = r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py'
global_variables = {'__name__': '__main__'}
120
121 def exec_module(module, global_variables):
122 '''Executes the provided module as if it were provided as '-m module'. The
123 functionality is implemented using `runpy.run_module`, which was added in
...........................................................................
c:\Users\mat\.vscode\extensions\ms-python.python-2018.6.0\pythonFiles\PythonTools\visualstudio_py_util.py in exec_code(code=b'import pandas as p\nimport numpy as np\nimport....score(x_train, y_train[:,i]))\n print(err)\n', file=r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py', global_variables={'MLPRegressor': <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>, 'RandomForestRegressor': <class 'sklearn.ensemble.forest.RandomForestRegressor'>, 'RandomizedSearchCV': <class 'sklearn.model_selection._search.RandomizedSearchCV'>, '__builtins__': {'ArithmeticError': <class 'ArithmeticError'>, 'AssertionError': <class 'AssertionError'>, 'AttributeError': <class 'AttributeError'>, 'BaseException': <class 'BaseException'>, 'BlockingIOError': <class 'BlockingIOError'>, 'BrokenPipeError':
<class 'BrokenPipeError'>, 'BufferError': <class 'BufferError'>, 'BytesWarning': <class 'BytesWarning'>, 'ChildProcessError': <class 'ChildProcessError'>, 'ConnectionAbortedError': <class 'ConnectionAbortedError'>, ...}, '__cached__': None, '__doc__': None, '__file__': r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py', '__loader__': None, '__name__': '__main__', '__package__': None, ...})
90 if os.path.isdir(sys.path[0]):
91 sys.path.insert(0, os.path.split(file)[0])
92 else:
93 sys.path[0] = os.path.split(file)[0]
94 code_obj = compile(code, file, 'exec')
---> 95 exec(code_obj, global_variables)
code_obj = <code object <module> at 0x02BC45F8, file "c:\Us...at\OneDrive\Desktop\TES\Analisi\Tesi.py", line 1>
global_variables = {'MLPRegressor': <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>, 'RandomForestRegressor': <class 'sklearn.ensemble.forest.RandomForestRegressor'>, 'RandomizedSearchCV': <class 'sklearn.model_selection._search.RandomizedSearchCV'>, '__builtins__': {'ArithmeticError': <class 'ArithmeticError'>, 'AssertionError': <class 'AssertionError'>, 'AttributeError': <class 'AttributeError'>, 'BaseException': <class 'BaseException'>, 'BlockingIOError': <class 'BlockingIOError'>, 'BrokenPipeError': <class 'BrokenPipeError'>, 'BufferError': <class 'BufferError'>, 'BytesWarning': <class 'BytesWarning'>, 'ChildProcessError': <class 'ChildProcessError'>, 'ConnectionAbortedError': <class 'ConnectionAbortedError'>, ...}, '__cached__': None, '__doc__': None, '__file__': r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py', '__loader__': None, '__name__': '__main__', '__package__': None, ...}
96
97 def exec_file(file, global_variables):
98 '''Executes the provided script as if it were the original script provided
99 to python.exe. The functionality is similar to `runpy.run_path`, which was
...........................................................................
c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py in <module>()
159 # print("Mean squared error: {}".format(rndf_err))
160 # print('Variance score: %.2f \n \n' % rndf.fit(x_train, y_train[:,i]).score(x_test, y_test[:,i]))
161
162 #multilayer perceptron
163 print("Multilayer Perceptron \n")
--> 164 perc = mlpottimizzata(x_train,y_train[:,i])
165 y_perc = perc.predict(x_test)
166 perc_err = mean_squared_error(y_test[:,i], y_perc)
167 err[2,i]=r2_score(y_test[:,i],y_perc)
168 print("Mean squared error: {}".format(perc_err))
...........................................................................
c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py in mlpottimizzata(x_train=array([[ 0.06 , 2.13 , 4.47
, .... 0.00125208,
0.00505016, 0.0039683 ]]), y_train=array([0.00827529, 0.00318743, 0.00103558, ..., 0.00064697, 0. ,
0.00333603]))
67 activation = ['logistic', 'tanh', 'relu']
68 learning_rate = ['constant', 'invscaling','adaptive']
69 learning_rate_init = [float(x) for x in np.logspace(start = -5, stop = 0, num = 20)]
70 random_grid3 = {'learning_rate': learning_rate,'activation': activation,'learning_rate_init': learning_rate_init, 'hidden_layer_sizes': hidden_layer_sizes, 'alpha': alpha}
71 mlp_random3 = RandomizedSearchCV(estimator = mlpsgd, param_distributions = random_grid3, n_iter = 350, n_jobs=-1)
---> 72 mlp_random3.fit(x_train, y_train)
mlp_random3.fit = <bound method BaseSearchCV.fit of RandomizedSear...urn_train_score='warn', scoring=None, verbose=0)>
x_train = array([[ 0.06 , 2.13 , 4.47 , .... 0.00125208,
0.00505016, 0.0039683 ]])
y_train = array([0.00827529, 0.00318743, 0.00103558, ..., 0.00064697, 0. ,
0.00333603])
73
74 if mlp_random3.best_score_ is max(mlp_random1.best_score_,mlp_random2.best_score_,mlp_random3.best_score_):
75 return mlp_random3.best_estimator_
76 if mlp_random1.best_score_ >= mlp_random2.best_score_:
...........................................................................
C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\model_selection\_search.py in fit(self=RandomizedSearchCV(cv=None, error_score='raise',...turn_train_score='warn', scoring=None, verbose=0), X=array([[ 0.06 , 2.13 , 4.47 , .... 0.00125208,
0.00505016, 0.0039683 ]]), y=array([0.00827529, 0.00318743, 0.00103558, ..., 0.00064697, 0. ,
0.00333603]), groups=None, **fit_params={})
634 return_train_score=self.return_train_score,
635 return_n_test_samples=True,
636 return_times=True, return_parameters=False,
637 error_score=self.error_score)
638 for parameters, (train, test) in product(candidate_params,
--> 639 cv.split(X, y, groups)))
cv.split = <bound method _BaseKFold.split of KFold(n_splits=3, random_state=None, shuffle=False)>
X = array([[ 0.06 , 2.13 , 4.47 , .... 0.00125208,
0.00505016, 0.0039683 ]])
y = array([0.00827529, 0.00318743, 0.00103558, ..., 0.00064697, 0. ,
0.00333603])
groups = None
640
641 # if one choose to see train score, "out" will contain train score info
642 if self.return_train_score:
643 (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,
...........................................................................
C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV.fit.<locals>.<genexpr>>)
784 if pre_dispatch == "all" or n_jobs == 1:
785 # The iterable was consumed all at once by the above for loop.
786 # No need to wait for async callbacks to trigger to
787 # consumption.
788 self._iterating = False
--> 789 self.retrieve()
self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
790 # Make sure that we get a last message telling us we are done
791 elapsed_time = time.time() - self._start_time
792 self._print('Done %3i out of %3i | elapsed: %s finished',
793 (len(self._output), len(self._output),
---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError Tue Jul 17 19:33:23 2018
PID: 9280Python 3.6.5: C:\Users\mat\AppData\Local\Programs\Python\Python36-32\python.exe
...........................................................................
C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
126 def __init__(self, iterator_slice):
127 self.items = list(iterator_slice)
128 self._size = len(self.items)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
self.items = [(<function _fit_and_score>, (MLPRegressor(activation='relu', alpha=811.130830...tion=0.1,
verbose=False, warm_start=False), array([[ 6.00000000e-02, 2.13000000e+00, 4.470...25207638e-03, 5.05016074e-03, 3.96830145e-03]]), array([0.00827529, 0.00318743, 0.00103558, ..., 0.00064697, 0. ,
0.00333603]), {'score': <function _passthrough_scorer>}, array([ 629, 630, 631, ..., 1882, 1883, 1884]), array([ 0, 1, 2, 3, 4, 5, 6, 7, ..., 621, 622, 623,
624, 625, 626, 627, 628]), 0, {'activation': 'relu', 'alpha': 811.130830789689, 'hidden_layer_sizes': (24,),
'learning_rate': 'adaptive', 'learning_rate_init': 0.5455594781168515}), {'error_score': 'raise', 'fit_params': {},
'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'})]
132
133 def __len__(self):
134 return self._size
135
...........................................................................
C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
126 def __init__(self, iterator_slice):
127 self.items = list(iterat
什么都不缺,就这样结束了。另外我需要提到错误中引用的 mplottimizzata 是包含我附加的第一段代码的函数。
我真的别无选择,非常感谢任何帮助。提前谢谢大家:)
注意。代码的另一部分做大致相同的事情,但使用 solver:'lbfgs' 并且它运行顺利,但这只会让我更加困惑。
当您使用列表理解和 float 参数 定义网格参数时出现问题
这对我来说很好用:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import RandomizedSearchCV
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
X = pd.read_csv('X.csv')
Y = pd.read_csv('y.csv')
X = X.iloc[1:,1:].values
Y = Y.iloc[1:,1].values
mlpsgd = MLPRegressor(max_iter = 1000, solver='sgd')
alpha = np.arange(0.01, 0.1, 0.01)
hidden_layer_sizes = [(int(x),int(y),int(z)) for x in np.logspace(start = 0, stop = 2.2, num = 8) for y in np.logspace(start = 0, stop = 2.2, num = 8) for z in np.logspace(start = 0, stop = 2.2, num = 8)]
hidden_layer_sizes.extend((int(x),int(y)) for x in np.logspace(start = 0, stop = 2, num = 25) for y in np.logspace(start = 0, stop = 2, num = 25))
hidden_layer_sizes.extend((int(x),) for x in np.logspace(start = 1, stop = 2, num = 1000))
activation = ['logistic', 'tanh', 'relu']
learning_rate = ['constant', 'invscaling','adaptive']
learning_rate_init = np.arange(0.01, 0.1, 0.01)
random_grid3 = {'learning_rate': learning_rate,'activation': activation,'learning_rate_init': learning_rate_init, 'hidden_layer_sizes': hidden_layer_sizes, 'alpha': alpha}
mlp_random3 = RandomizedSearchCV(estimator = mlpsgd, param_distributions = random_grid3, n_iter = 350, n_jobs=-1)
mlp_random3.fit(X, Y)
print(mlp_random3.best_estimator_)
MLPRegressor(activation='relu', alpha=0.03, batch_size='auto',
beta_1=0.9,
beta_2=0.999, early_stopping=False, epsilon=1e-08,
hidden_layer_sizes=(4, 18, 1), learning_rate='adaptive',
learning_rate_init=0.05, max_iter=1000, momentum=0.9,
nesterovs_momentum=True, power_t=0.5, random_state=None,
shuffle=True, solver='sgd', tol=0.0001, validation_fraction=0.1,
verbose=False, warm_start=False)
我在互联网上发现了类似的问题,但略有不同,none 的解决方案对我有用。 我有一组解释变量 X (2085,12) 和一个解释变量 y (2085,1),我必须对其进行一些处理,包括使用这些 sklearn 类(如标题)。为了获得正确的超参数,我将代码安排如下:
#solver: sgd
mlpsgd = MLPRegressor(max_iter = 1000, solver='sgd')
alpha = [float(x) for x in np.logspace(start = -6, stop = 3, num = 100)]
hidden_layer_sizes = [(int(x),int(y),int(z)) for x in np.logspace(start = 0, stop = 2.2, num = 8) for y in np.logspace(start = 0, stop = 2.2, num = 8) for z in np.logspace(start = 0, stop = 2.2, num = 8)]
hidden_layer_sizes.extend((int(x),int(y)) for x in np.logspace(start = 0, stop = 2, num = 25) for y in np.logspace(start = 0, stop = 2, num = 25))
hidden_layer_sizes.extend((int(x),) for x in np.logspace(start = 1, stop = 2, num = 1000))
activation = ['logistic', 'tanh', 'relu']
learning_rate = ['constant', 'invscaling','adaptive']
learning_rate_init = [float(x) for x in np.logspace(start = -5, stop = 0, num = 20)]
random_grid3 = {'learning_rate': learning_rate,'activation': activation,'learning_rate_init': learning_rate_init, 'hidden_layer_sizes': hidden_layer_sizes, 'alpha': alpha}
mlp_random3 = RandomizedSearchCV(estimator = mlpsgd, param_distributions = random_grid3, n_iter = 350, n_jobs=-1)
mlp_random3.fit(X, y)
现在我知道整个随机网格非常大,但我什至尝试了一个非常小的网格,这不是问题(这样它更适合我认为的研究类型做),我应该提到我使用 windows 并且程序以
开头if __name__ == '__main__':
据我所知(希望是正确的),我在附加的第一部分代码的倒数第二行中要求的多处理是必需的。 好吧,当我 运行 代码时,350 次迭代中的一些代码被正确处理,但随后它停止并打印此错误:
Traceback (most recent call last):
File "c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py", line 164, in <module>
perc = mlpottimizzata(x_train,y_train[:,i])
File "c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py", line 72, in mlpottimizzata
mlp_random3.fit(x_train, y_train)
File "C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\model_selection\_search.py", line 639, in fit
cv.split(X, y, groups)))
File "C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\externals\joblib\parallel.py", line 789, in __call__
self.retrieve()
File "C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\externals\joblib\parallel.py", line 740, in retrieve
raise exception
sklearn.externals.joblib.my_exceptions.JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
c:\Users\mat\.vscode\extensions\ms-python.python-2018.6.0\pythonFiles\PythonTools\visualstudio_py_launcher.py in <module>()
86 del sys, os
87
88 # and start debugging
89 ## Begin modification by Don Jayamanne
90 # Pass current Process id to pass back to debugger
---> 91 vspd.debug(filename, port_num, debug_id, debug_options, currentPid, run_as)
92 ## End Modification by Don Jayamanne
...........................................................................
c:\Users\mat\.vscode\extensions\ms-python.python-2018.6.0\pythonFiles\PythonTools\visualstudio_py_debugger.py in debug(file=r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py', port_num=58990, debug_id='34806ad9-833a-4524-8cd6-18ca4aa74f14', debug_options={'RedirectOutput'}, currentPid=10548, run_as='script')
2620 if run_as == 'module':
2621 exec_module(file, globals_obj)
2622 elif run_as == 'code':
2623 exec_code(file, '<string>', globals_obj)
2624 else:
-> 2625 exec_file(file, globals_obj)
file = r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py'
globals_obj = {'__name__': '__main__'}
2626 finally:
2627 sys.settrace(None)
2628 THREADS_LOCK.acquire()
2629 del THREADS[cur_thread.id]
...........................................................................
c:\Users\mat\.vscode\extensions\ms-python.python-2018.6.0\pythonFiles\PythonTools\visualstudio_py_util.py in exec_file(file=r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py', global_variables={'__name__': '__main__'})
114 f = open(file, "rb")
115 try:
116 code = f.read().replace(to_bytes('\r\n'), to_bytes('\n')) + to_bytes('\n')
117 finally:
118 f.close()
--> 119 exec_code(code, file, global_variables)
code = b'import pandas as p\nimport numpy as np\nimport....score(x_train, y_train[:,i]))\n print(err)\n'
file = r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py'
global_variables = {'__name__': '__main__'}
120
121 def exec_module(module, global_variables):
122 '''Executes the provided module as if it were provided as '-m module'. The
123 functionality is implemented using `runpy.run_module`, which was added in
...........................................................................
c:\Users\mat\.vscode\extensions\ms-python.python-2018.6.0\pythonFiles\PythonTools\visualstudio_py_util.py in exec_code(code=b'import pandas as p\nimport numpy as np\nimport....score(x_train, y_train[:,i]))\n print(err)\n', file=r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py', global_variables={'MLPRegressor': <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>, 'RandomForestRegressor': <class 'sklearn.ensemble.forest.RandomForestRegressor'>, 'RandomizedSearchCV': <class 'sklearn.model_selection._search.RandomizedSearchCV'>, '__builtins__': {'ArithmeticError': <class 'ArithmeticError'>, 'AssertionError': <class 'AssertionError'>, 'AttributeError': <class 'AttributeError'>, 'BaseException': <class 'BaseException'>, 'BlockingIOError': <class 'BlockingIOError'>, 'BrokenPipeError':
<class 'BrokenPipeError'>, 'BufferError': <class 'BufferError'>, 'BytesWarning': <class 'BytesWarning'>, 'ChildProcessError': <class 'ChildProcessError'>, 'ConnectionAbortedError': <class 'ConnectionAbortedError'>, ...}, '__cached__': None, '__doc__': None, '__file__': r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py', '__loader__': None, '__name__': '__main__', '__package__': None, ...})
90 if os.path.isdir(sys.path[0]):
91 sys.path.insert(0, os.path.split(file)[0])
92 else:
93 sys.path[0] = os.path.split(file)[0]
94 code_obj = compile(code, file, 'exec')
---> 95 exec(code_obj, global_variables)
code_obj = <code object <module> at 0x02BC45F8, file "c:\Us...at\OneDrive\Desktop\TES\Analisi\Tesi.py", line 1>
global_variables = {'MLPRegressor': <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>, 'RandomForestRegressor': <class 'sklearn.ensemble.forest.RandomForestRegressor'>, 'RandomizedSearchCV': <class 'sklearn.model_selection._search.RandomizedSearchCV'>, '__builtins__': {'ArithmeticError': <class 'ArithmeticError'>, 'AssertionError': <class 'AssertionError'>, 'AttributeError': <class 'AttributeError'>, 'BaseException': <class 'BaseException'>, 'BlockingIOError': <class 'BlockingIOError'>, 'BrokenPipeError': <class 'BrokenPipeError'>, 'BufferError': <class 'BufferError'>, 'BytesWarning': <class 'BytesWarning'>, 'ChildProcessError': <class 'ChildProcessError'>, 'ConnectionAbortedError': <class 'ConnectionAbortedError'>, ...}, '__cached__': None, '__doc__': None, '__file__': r'c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py', '__loader__': None, '__name__': '__main__', '__package__': None, ...}
96
97 def exec_file(file, global_variables):
98 '''Executes the provided script as if it were the original script provided
99 to python.exe. The functionality is similar to `runpy.run_path`, which was
...........................................................................
c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py in <module>()
159 # print("Mean squared error: {}".format(rndf_err))
160 # print('Variance score: %.2f \n \n' % rndf.fit(x_train, y_train[:,i]).score(x_test, y_test[:,i]))
161
162 #multilayer perceptron
163 print("Multilayer Perceptron \n")
--> 164 perc = mlpottimizzata(x_train,y_train[:,i])
165 y_perc = perc.predict(x_test)
166 perc_err = mean_squared_error(y_test[:,i], y_perc)
167 err[2,i]=r2_score(y_test[:,i],y_perc)
168 print("Mean squared error: {}".format(perc_err))
...........................................................................
c:\Users\mat\OneDrive\Desktop\TES\Analisi\Tesi.py in mlpottimizzata(x_train=array([[ 0.06 , 2.13 , 4.47
, .... 0.00125208,
0.00505016, 0.0039683 ]]), y_train=array([0.00827529, 0.00318743, 0.00103558, ..., 0.00064697, 0. ,
0.00333603]))
67 activation = ['logistic', 'tanh', 'relu']
68 learning_rate = ['constant', 'invscaling','adaptive']
69 learning_rate_init = [float(x) for x in np.logspace(start = -5, stop = 0, num = 20)]
70 random_grid3 = {'learning_rate': learning_rate,'activation': activation,'learning_rate_init': learning_rate_init, 'hidden_layer_sizes': hidden_layer_sizes, 'alpha': alpha}
71 mlp_random3 = RandomizedSearchCV(estimator = mlpsgd, param_distributions = random_grid3, n_iter = 350, n_jobs=-1)
---> 72 mlp_random3.fit(x_train, y_train)
mlp_random3.fit = <bound method BaseSearchCV.fit of RandomizedSear...urn_train_score='warn', scoring=None, verbose=0)>
x_train = array([[ 0.06 , 2.13 , 4.47 , .... 0.00125208,
0.00505016, 0.0039683 ]])
y_train = array([0.00827529, 0.00318743, 0.00103558, ..., 0.00064697, 0. ,
0.00333603])
73
74 if mlp_random3.best_score_ is max(mlp_random1.best_score_,mlp_random2.best_score_,mlp_random3.best_score_):
75 return mlp_random3.best_estimator_
76 if mlp_random1.best_score_ >= mlp_random2.best_score_:
...........................................................................
C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\model_selection\_search.py in fit(self=RandomizedSearchCV(cv=None, error_score='raise',...turn_train_score='warn', scoring=None, verbose=0), X=array([[ 0.06 , 2.13 , 4.47 , .... 0.00125208,
0.00505016, 0.0039683 ]]), y=array([0.00827529, 0.00318743, 0.00103558, ..., 0.00064697, 0. ,
0.00333603]), groups=None, **fit_params={})
634 return_train_score=self.return_train_score,
635 return_n_test_samples=True,
636 return_times=True, return_parameters=False,
637 error_score=self.error_score)
638 for parameters, (train, test) in product(candidate_params,
--> 639 cv.split(X, y, groups)))
cv.split = <bound method _BaseKFold.split of KFold(n_splits=3, random_state=None, shuffle=False)>
X = array([[ 0.06 , 2.13 , 4.47 , .... 0.00125208,
0.00505016, 0.0039683 ]])
y = array([0.00827529, 0.00318743, 0.00103558, ..., 0.00064697, 0. ,
0.00333603])
groups = None
640
641 # if one choose to see train score, "out" will contain train score info
642 if self.return_train_score:
643 (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,
...........................................................................
C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV.fit.<locals>.<genexpr>>)
784 if pre_dispatch == "all" or n_jobs == 1:
785 # The iterable was consumed all at once by the above for loop.
786 # No need to wait for async callbacks to trigger to
787 # consumption.
788 self._iterating = False
--> 789 self.retrieve()
self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
790 # Make sure that we get a last message telling us we are done
791 elapsed_time = time.time() - self._start_time
792 self._print('Done %3i out of %3i | elapsed: %s finished',
793 (len(self._output), len(self._output),
---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError Tue Jul 17 19:33:23 2018
PID: 9280Python 3.6.5: C:\Users\mat\AppData\Local\Programs\Python\Python36-32\python.exe
...........................................................................
C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
126 def __init__(self, iterator_slice):
127 self.items = list(iterator_slice)
128 self._size = len(self.items)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
self.items = [(<function _fit_and_score>, (MLPRegressor(activation='relu', alpha=811.130830...tion=0.1,
verbose=False, warm_start=False), array([[ 6.00000000e-02, 2.13000000e+00, 4.470...25207638e-03, 5.05016074e-03, 3.96830145e-03]]), array([0.00827529, 0.00318743, 0.00103558, ..., 0.00064697, 0. ,
0.00333603]), {'score': <function _passthrough_scorer>}, array([ 629, 630, 631, ..., 1882, 1883, 1884]), array([ 0, 1, 2, 3, 4, 5, 6, 7, ..., 621, 622, 623,
624, 625, 626, 627, 628]), 0, {'activation': 'relu', 'alpha': 811.130830789689, 'hidden_layer_sizes': (24,),
'learning_rate': 'adaptive', 'learning_rate_init': 0.5455594781168515}), {'error_score': 'raise', 'fit_params': {},
'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'})]
132
133 def __len__(self):
134 return self._size
135
...........................................................................
C:\Users\mat\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
126 def __init__(self, iterator_slice):
127 self.items = list(iterat
什么都不缺,就这样结束了。另外我需要提到错误中引用的 mplottimizzata 是包含我附加的第一段代码的函数。 我真的别无选择,非常感谢任何帮助。提前谢谢大家:)
注意。代码的另一部分做大致相同的事情,但使用 solver:'lbfgs' 并且它运行顺利,但这只会让我更加困惑。
当您使用列表理解和 float 参数 定义网格参数时出现问题
这对我来说很好用:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import RandomizedSearchCV
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
X = pd.read_csv('X.csv')
Y = pd.read_csv('y.csv')
X = X.iloc[1:,1:].values
Y = Y.iloc[1:,1].values
mlpsgd = MLPRegressor(max_iter = 1000, solver='sgd')
alpha = np.arange(0.01, 0.1, 0.01)
hidden_layer_sizes = [(int(x),int(y),int(z)) for x in np.logspace(start = 0, stop = 2.2, num = 8) for y in np.logspace(start = 0, stop = 2.2, num = 8) for z in np.logspace(start = 0, stop = 2.2, num = 8)]
hidden_layer_sizes.extend((int(x),int(y)) for x in np.logspace(start = 0, stop = 2, num = 25) for y in np.logspace(start = 0, stop = 2, num = 25))
hidden_layer_sizes.extend((int(x),) for x in np.logspace(start = 1, stop = 2, num = 1000))
activation = ['logistic', 'tanh', 'relu']
learning_rate = ['constant', 'invscaling','adaptive']
learning_rate_init = np.arange(0.01, 0.1, 0.01)
random_grid3 = {'learning_rate': learning_rate,'activation': activation,'learning_rate_init': learning_rate_init, 'hidden_layer_sizes': hidden_layer_sizes, 'alpha': alpha}
mlp_random3 = RandomizedSearchCV(estimator = mlpsgd, param_distributions = random_grid3, n_iter = 350, n_jobs=-1)
mlp_random3.fit(X, Y)
print(mlp_random3.best_estimator_)
MLPRegressor(activation='relu', alpha=0.03, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(4, 18, 1), learning_rate='adaptive', learning_rate_init=0.05, max_iter=1000, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=None, shuffle=True, solver='sgd', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False)