Python 这个脚本可以多线程吗?
Python Can this script be multithreaded?
如果有人可以帮助我 multi-threading 这个脚本并将输出写入文本文件,那就太好了。
我真的是编码新手,所以请帮助我。
#!/usr/bin/python
from tornado import ioloop, httpclient
from BeautifulSoup import BeautifulSoup
from mechanize import Browser
import requests
import urllib2
import socket
import sys
def handle_request(response):
print response.code
global i
i = 0
i -= 1
if i == 0:
http_client = httpclient.AsyncHTTPClient()
for url in open('urls.txt'):
try:
br = Browser()
br.set_handle_robots(False)
res = br.open(url, None, 2.5)
data = res.get_data()
soup = BeautifulSoup(data)
title = soup.find('title')
if soup.title != None:
print url, title.renderContents(), '\n'
i += 1
except urllib2.URLError, e:
print "Oops, timed out?", '\n'
except socket.error,e:
print "Oops, timed out?", '\n'
except socket.timeout:
print "Oops, timed out?", '\n'
print 'Processing of list completed, Cheers!!'
sys.exit()
try:
ioloop.IOLoop.instance().start()
except KeyboardInterrupt:
ioloop.IOLoop.instance().stop()
我正在尝试 grep 主机列表的 HTTP 标题。
你已经实现的基本思路是an non-blocking HTTP client.
def handle_request(response):
if response.error:
print "Error:", response.error
else:
print response.body
for url in ["http://google.com", "http://twitter.com"]:
http_client = httpclient.AsyncHTTPClient()
http_client.fetch(url, handle_request)
您可以遍历您的 url,回调将在特定 url 的响应可用时立即调用。
如果没有必要,我不会混淆 mechanize、ioloop...。
除此之外,我推荐grequests。轻量级的工具,满足你的需求。
import grequests
from bs4 import BeautifulSoup
urls = ['http://google.com', 'http://www.python.org/']
rs = (grequests.get(u) for u in urls)
res = grequests.map(rs)
for r in res:
soup = BeautifulSoup(r.text)
print "%s: %s" % (r.url, soup.title.text)
如果有人可以帮助我 multi-threading 这个脚本并将输出写入文本文件,那就太好了。 我真的是编码新手,所以请帮助我。
#!/usr/bin/python
from tornado import ioloop, httpclient
from BeautifulSoup import BeautifulSoup
from mechanize import Browser
import requests
import urllib2
import socket
import sys
def handle_request(response):
print response.code
global i
i = 0
i -= 1
if i == 0:
http_client = httpclient.AsyncHTTPClient()
for url in open('urls.txt'):
try:
br = Browser()
br.set_handle_robots(False)
res = br.open(url, None, 2.5)
data = res.get_data()
soup = BeautifulSoup(data)
title = soup.find('title')
if soup.title != None:
print url, title.renderContents(), '\n'
i += 1
except urllib2.URLError, e:
print "Oops, timed out?", '\n'
except socket.error,e:
print "Oops, timed out?", '\n'
except socket.timeout:
print "Oops, timed out?", '\n'
print 'Processing of list completed, Cheers!!'
sys.exit()
try:
ioloop.IOLoop.instance().start()
except KeyboardInterrupt:
ioloop.IOLoop.instance().stop()
我正在尝试 grep 主机列表的 HTTP 标题。
你已经实现的基本思路是an non-blocking HTTP client.
def handle_request(response):
if response.error:
print "Error:", response.error
else:
print response.body
for url in ["http://google.com", "http://twitter.com"]:
http_client = httpclient.AsyncHTTPClient()
http_client.fetch(url, handle_request)
您可以遍历您的 url,回调将在特定 url 的响应可用时立即调用。
如果没有必要,我不会混淆 mechanize、ioloop...。
除此之外,我推荐grequests。轻量级的工具,满足你的需求。
import grequests
from bs4 import BeautifulSoup
urls = ['http://google.com', 'http://www.python.org/']
rs = (grequests.get(u) for u in urls)
res = grequests.map(rs)
for r in res:
soup = BeautifulSoup(r.text)
print "%s: %s" % (r.url, soup.title.text)