python 中的多处理(从 for 循环到多处理 for 循环)
multiprocessing in python (going from a for loop to multiprocessing for loop)
我有一个有效的脚本。它有一个 for 循环,我想通过合并多处理来提高速度。
没有多处理的代码如下:
Symbol= Symbol[0:] #slicing to coose which stocks to look at
################################for loop
for item in Symbol:
print item
try:
serious=web.DataReader([item], 'yahoo', start, end)['Adj Close']
serious2=serious.loc[:, item].tolist() #extract the column of 'Adj Close'
tickerlistori.append(item)
valuemax = max(serious2)
indexmax = serious2.index(max(serious2))
valuemin = min(serious2)
indexmin = serious2.index(min(serious2))
pricecurrent = serious2[-1]
if valuemax>30 and valuemin<2 and pricecurrent<2.5:
tickerlist.append(item)
maxpricelist.append(valuemax)
minpricelist.append(valuemin)
except RemoteDataError:
pass
print tickerlist
下面的第二个代码块是“并行处理”
Symbol= Symbol[0:] #slicing to coose which stocks to look at
############ multi processing before the for loop
def search1(Symbol):
for item in Symbol:
print item #trying to see why the tickers are messed up
try:
serious=web.DataReader([item], 'yahoo', start, end)['Adj Close']
serious2=serious.loc[:, item].tolist() #extract the column of 'Adj Close'
tickerlistori.append(item)
valuemax = max(serious2)
indexmax = serious2.index(max(serious2))
valuemin = min(serious2)
indexmin = serious2.index(min(serious2))
pricecurrent = serious2[-1]
if valuemax>30 and valuemin<2 and pricecurrent<2.5:
tickerlist.append(item)
maxpricelist.append(valuemax)
minpricelist.append(valuemin)
except RemoteDataError:
pass
pool = Pool(processes=4)
tickerlist = pool.map(search1, Symbol)
print tickerlist
第一个工作正常但第二个,虽然代码 运行 没有错误,但输入 pool.map(search1, Symbol)
的符号似乎不正确。
提前致谢。
(Symbol 只是一个股票行情列表)
----------------进行 tdelaney 建议的更改后
import matplotlib.pyplot as plt
import csv
import pandas as pd
import datetime
import pandas.io.data as web
from pandas.io.data import DataReader, SymbolWarning, RemoteDataError
from filesortfunct import filesort
from scipy import stats
from scipy.stats.stats import pearsonr
import numpy as np
import math
from multiprocessing import Pool
import warnings
warnings.filterwarnings("ignore")
#decide the two dates between which to look at stock prices
start = datetime.datetime.strptime('2/10/2015', '%m/%d/%Y')
end = datetime.datetime.strptime('2/25/2016', '%m/%d/%Y')
#intended to collect indeces and min/max prices
#global tickerlist, maxpricelist, minpricelist, tickerlistori
tickerlistori=[] #list of stocks available from google finance
tickerlist=[]
maxpricelist = []
minpricelist =[]
datanamelist= ['NYSE.csv']#,'NASDAQ.csv','AMEX.csv']
for each in datanamelist:
#print each #print out which stock exchange is being looked at
dataname= each #csv file from which to extract stock tickers
new = 'new'
df = pd.read_csv(dataname, sep=',')
df = df[['Symbol']]
df.to_csv(new+dataname, sep=',', index=False)
x=open(new+dataname,'rb') #convert it into a form more managable
f = csv.reader(x) # csv is binary
Symbol = zip(*f)
#print type(Symbol) #list format
Symbol=Symbol[0] #pick out the first column
# Symbol = Symbol[1:len(Symbol)] #remove the first row "symbol" header
Symbol = Symbol[3210:len(Symbol)]
Symbol= Symbol[0:] #slicing to coose which stocks to look at
#print Symbol
def search1(item):
print item #trying to see why the tickers are messed up
try:
serious=web.DataReader([item], 'yahoo', start, end)['Adj Close']
serious2=serious.loc[:, item].tolist() #extract the column of 'Adj Close'
valuemax = max(serious2)
indexmax = serious2.index(max(serious2))
valuemin = min(serious2)
indexmin = serious2.index(min(serious2))
pricecurrent = serious2[-1]
if valuemax>30 and valuemin<2 and pricecurrent<2.5:
return item, valuemax, valuemin
except RemoteDataError:
pass
pool = Pool(processes=4)
pool.start()
for result in pool.map(search1, Symbol):
if result:
tickerlist.append(result[0])
maxpricelist.append(result[1])
minpricelist.append(result[2])
print tickerlist
你有几个问题:
map
将分别枚举 Symbol
和 运行 工人。工人不需要在for循环中再次枚举它
- 您更新了全局列表...但是这些列表对于子流程是全局的。 parent 从未见过他们
这是一个更新
Symbol= Symbol[0:] #slicing to coose which stocks to look at
############ multi processing before the for loop
def search1(item):
print item #trying to see why the tickers are messed up
try:
serious=web.DataReader([item], 'yahoo', start, end)['Adj Close']
serious2=serious.loc[:, item].tolist() #extract the column of 'Adj Close'
valuemax = max(serious2)
indexmax = serious2.index(max(serious2))
valuemin = min(serious2)
indexmin = serious2.index(min(serious2))
pricecurrent = serious2[-1]
if valuemax>30 and valuemin<2 and pricecurrent<2.5:
return item, valuemax, valuemin
except RemoteDataError:
pass
pool = Pool(processes=4)
for result in pool.map(search1, Symbol):
if result:
tickerlist.append(result[0])
maxpricelist.append(result[1])
minpricelist.append(result[2])
print tickerlist
我有一个有效的脚本。它有一个 for 循环,我想通过合并多处理来提高速度。
没有多处理的代码如下:
Symbol= Symbol[0:] #slicing to coose which stocks to look at
################################for loop
for item in Symbol:
print item
try:
serious=web.DataReader([item], 'yahoo', start, end)['Adj Close']
serious2=serious.loc[:, item].tolist() #extract the column of 'Adj Close'
tickerlistori.append(item)
valuemax = max(serious2)
indexmax = serious2.index(max(serious2))
valuemin = min(serious2)
indexmin = serious2.index(min(serious2))
pricecurrent = serious2[-1]
if valuemax>30 and valuemin<2 and pricecurrent<2.5:
tickerlist.append(item)
maxpricelist.append(valuemax)
minpricelist.append(valuemin)
except RemoteDataError:
pass
print tickerlist
下面的第二个代码块是“并行处理”
Symbol= Symbol[0:] #slicing to coose which stocks to look at
############ multi processing before the for loop
def search1(Symbol):
for item in Symbol:
print item #trying to see why the tickers are messed up
try:
serious=web.DataReader([item], 'yahoo', start, end)['Adj Close']
serious2=serious.loc[:, item].tolist() #extract the column of 'Adj Close'
tickerlistori.append(item)
valuemax = max(serious2)
indexmax = serious2.index(max(serious2))
valuemin = min(serious2)
indexmin = serious2.index(min(serious2))
pricecurrent = serious2[-1]
if valuemax>30 and valuemin<2 and pricecurrent<2.5:
tickerlist.append(item)
maxpricelist.append(valuemax)
minpricelist.append(valuemin)
except RemoteDataError:
pass
pool = Pool(processes=4)
tickerlist = pool.map(search1, Symbol)
print tickerlist
第一个工作正常但第二个,虽然代码 运行 没有错误,但输入 pool.map(search1, Symbol)
的符号似乎不正确。
提前致谢。
(Symbol 只是一个股票行情列表)
----------------进行 tdelaney 建议的更改后
import matplotlib.pyplot as plt
import csv
import pandas as pd
import datetime
import pandas.io.data as web
from pandas.io.data import DataReader, SymbolWarning, RemoteDataError
from filesortfunct import filesort
from scipy import stats
from scipy.stats.stats import pearsonr
import numpy as np
import math
from multiprocessing import Pool
import warnings
warnings.filterwarnings("ignore")
#decide the two dates between which to look at stock prices
start = datetime.datetime.strptime('2/10/2015', '%m/%d/%Y')
end = datetime.datetime.strptime('2/25/2016', '%m/%d/%Y')
#intended to collect indeces and min/max prices
#global tickerlist, maxpricelist, minpricelist, tickerlistori
tickerlistori=[] #list of stocks available from google finance
tickerlist=[]
maxpricelist = []
minpricelist =[]
datanamelist= ['NYSE.csv']#,'NASDAQ.csv','AMEX.csv']
for each in datanamelist:
#print each #print out which stock exchange is being looked at
dataname= each #csv file from which to extract stock tickers
new = 'new'
df = pd.read_csv(dataname, sep=',')
df = df[['Symbol']]
df.to_csv(new+dataname, sep=',', index=False)
x=open(new+dataname,'rb') #convert it into a form more managable
f = csv.reader(x) # csv is binary
Symbol = zip(*f)
#print type(Symbol) #list format
Symbol=Symbol[0] #pick out the first column
# Symbol = Symbol[1:len(Symbol)] #remove the first row "symbol" header
Symbol = Symbol[3210:len(Symbol)]
Symbol= Symbol[0:] #slicing to coose which stocks to look at
#print Symbol
def search1(item):
print item #trying to see why the tickers are messed up
try:
serious=web.DataReader([item], 'yahoo', start, end)['Adj Close']
serious2=serious.loc[:, item].tolist() #extract the column of 'Adj Close'
valuemax = max(serious2)
indexmax = serious2.index(max(serious2))
valuemin = min(serious2)
indexmin = serious2.index(min(serious2))
pricecurrent = serious2[-1]
if valuemax>30 and valuemin<2 and pricecurrent<2.5:
return item, valuemax, valuemin
except RemoteDataError:
pass
pool = Pool(processes=4)
pool.start()
for result in pool.map(search1, Symbol):
if result:
tickerlist.append(result[0])
maxpricelist.append(result[1])
minpricelist.append(result[2])
print tickerlist
你有几个问题:
map
将分别枚举Symbol
和 运行 工人。工人不需要在for循环中再次枚举它- 您更新了全局列表...但是这些列表对于子流程是全局的。 parent 从未见过他们
这是一个更新
Symbol= Symbol[0:] #slicing to coose which stocks to look at
############ multi processing before the for loop
def search1(item):
print item #trying to see why the tickers are messed up
try:
serious=web.DataReader([item], 'yahoo', start, end)['Adj Close']
serious2=serious.loc[:, item].tolist() #extract the column of 'Adj Close'
valuemax = max(serious2)
indexmax = serious2.index(max(serious2))
valuemin = min(serious2)
indexmin = serious2.index(min(serious2))
pricecurrent = serious2[-1]
if valuemax>30 and valuemin<2 and pricecurrent<2.5:
return item, valuemax, valuemin
except RemoteDataError:
pass
pool = Pool(processes=4)
for result in pool.map(search1, Symbol):
if result:
tickerlist.append(result[0])
maxpricelist.append(result[1])
minpricelist.append(result[2])
print tickerlist