当我 运行 多个进程时出现索引错误
Index Error when i run multiple process
此脚本抛出索引错误
from urlparse import urlparse
from multiprocessing.pool import Pool
import re
import urllib2
def btl_test(url):
page = urllib2.urlopen(url).read()
page1 = (re.findall(r'<title>(.*?)<\/title>',page)[0])
return page1
url = ["http://google.com","http://example.com","http://yahoo.com","http://linkedin.com","http://facebook.com","http://orkut.com","http://oosing.com","http://pinterets.com"]
nprocs = 100 # nprocs is the number of processes to run
ParsePool = Pool(nprocs)
ParsedURLS = ParsePool.map(btl_test,url)
print ParsedURLS
输出:
Traceback (most recent call last):
File "multithread1.py", line 15, in <module>
ParsedURLS = ParsePool.map(btl_test,url)
File "/usr/lib/python2.7/multiprocessing/pool.py", line 251, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 558, in get
raise self._value
IndexError: list index out of range
以上是错误信息
问题出在哪里,如何解决?
有机会 url does not have title tag
所以从这个转换为
def btl_test(url):
page = urllib2.urlopen(url).read()
page1 = (re.findall(r'<title>(.*?)<\/title>',page)[0])
return page1
这个
def btl_test(url):
page = urllib2.urlopen(url).read()
page1 = re.findall(r'<title>(.*?)<\/title>',page)
return (page1[0]) if len(page1)>0 else "None"
此脚本抛出索引错误
from urlparse import urlparse
from multiprocessing.pool import Pool
import re
import urllib2
def btl_test(url):
page = urllib2.urlopen(url).read()
page1 = (re.findall(r'<title>(.*?)<\/title>',page)[0])
return page1
url = ["http://google.com","http://example.com","http://yahoo.com","http://linkedin.com","http://facebook.com","http://orkut.com","http://oosing.com","http://pinterets.com"]
nprocs = 100 # nprocs is the number of processes to run
ParsePool = Pool(nprocs)
ParsedURLS = ParsePool.map(btl_test,url)
print ParsedURLS
输出:
Traceback (most recent call last):
File "multithread1.py", line 15, in <module>
ParsedURLS = ParsePool.map(btl_test,url)
File "/usr/lib/python2.7/multiprocessing/pool.py", line 251, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 558, in get
raise self._value
IndexError: list index out of range
以上是错误信息
问题出在哪里,如何解决?
有机会 url does not have title tag
所以从这个转换为
def btl_test(url):
page = urllib2.urlopen(url).read()
page1 = (re.findall(r'<title>(.*?)<\/title>',page)[0])
return page1
这个
def btl_test(url):
page = urllib2.urlopen(url).read()
page1 = re.findall(r'<title>(.*?)<\/title>',page)
return (page1[0]) if len(page1)>0 else "None"