pyqt4:循环主渲染 class?
pyqt4: Loop main Render class?
我有一个 PyQt4 class 可以下载我用于报废目的的网页。
当我将 url 列表传递给 Render class 时实例化它工作正常(单次调用)但是当我尝试循环 [r = Render(url, cb=scrape)] 有多个 urls 列表,在第一个循环之后,执行停止或挂起,没有抛出任何错误。
我想单独循环 class 因为 urls 列表属于不同的类别并且必须单独存储提取的内容。
我也知道了只能启动一个app,如果是这样怎么退出app不退出。这样新的 url 列表可以被同一个应用程序使用
我被这个问题困扰了一段时间。提前致谢
import sys
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
class Render(QWebPage):
def __init__(self, urls, cb):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.urls = urls
self.cb = cb
self.crawl()
self.app.exec_()
def crawl(self):
if self.urls:
url = self.urls.pop(0)
print 'Downloading', url
self.mainFrame().load(QUrl(url))
else:
self.app.quit()
def _loadFinished(self, result):
frame = self.mainFrame()
url = str(frame.url().toString())
html = frame.toHtml()
self.cb(url, html)
self.crawl()
def scrape(url, html):
pass # have scraping code here
url1 = ['http://webscraping.com', 'http://webscraping.com/blog']
url2 = ['http://webscraping.com', 'http://webscraping.com/blog']
urls =[]
urls.append(url1)
urls.append(url2)
for url in urls:
r = Render(url, cb=scrape)
问题是您只能实例化一个 QApplication 对象。这是一个避免这种情况的更新版本,然后仅在下载 URL:
时运行 Qt 的执行循环
import sys
from PyQt4.QtGui import QApplication
from PyQt4.QtCore import QUrl
from PyQt4.QtWebKit import QWebPage
class Render(QWebPage):
def __init__(self, cb):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.cb = cb
def crawl(self, url):
print 'Downloading', url
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
frame = self.mainFrame()
url = str(frame.url().toString())
html = frame.toHtml()
self.cb(url, html)
self.app.quit()
def scrape(url, html):
pass # add scraping code here
print len(html)
r = Render(cb=scrape)
urls = ['http://webscraping.com', 'http://webscraping.com/blog']
for url in urls:
r.crawl(url)
不幸的是,@hoju 的回答对我不起作用。
这是对我有用的方法(基本上是设置一个计时器来检查加载是否已完成)。
import sys
from PyQt4.QtGui import QApplication
from PyQt4.QtCore import QUrl, QTimer
from PyQt4.QtWebKit import QWebPage
class Render(QWebPage):
def __init__(self, url):
QWebPage.__init__(self)
self.frame = None
self.mainFrame().loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
def _loadFinished(self, result):
self.frame = self.mainFrame()
def go_again():
global r, timer, urls
if(len(urls)>0):
print("loading",urls[0])
r = Render(urls.pop())
timer.start(1000)
else:
print("finished")
sys.exit(app.exec_())
def check_done():
global r, timer
if r.frame is not None:
timer.stop()
html_result = r.frame.toHtml()
#do something with html
print("loaded")
go_again()
app = QApplication(sys.argv)
urls = ['
timer = QTimer()
timer.timeout.connect(check_done)
#check every second
go_again()
sys.exit(app.exec_())
我有一个 PyQt4 class 可以下载我用于报废目的的网页。
当我将 url 列表传递给 Render class 时实例化它工作正常(单次调用)但是当我尝试循环 [r = Render(url, cb=scrape)] 有多个 urls 列表,在第一个循环之后,执行停止或挂起,没有抛出任何错误。
我想单独循环 class 因为 urls 列表属于不同的类别并且必须单独存储提取的内容。
我也知道了只能启动一个app,如果是这样怎么退出app不退出。这样新的 url 列表可以被同一个应用程序使用
我被这个问题困扰了一段时间。提前致谢
import sys
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
class Render(QWebPage):
def __init__(self, urls, cb):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.urls = urls
self.cb = cb
self.crawl()
self.app.exec_()
def crawl(self):
if self.urls:
url = self.urls.pop(0)
print 'Downloading', url
self.mainFrame().load(QUrl(url))
else:
self.app.quit()
def _loadFinished(self, result):
frame = self.mainFrame()
url = str(frame.url().toString())
html = frame.toHtml()
self.cb(url, html)
self.crawl()
def scrape(url, html):
pass # have scraping code here
url1 = ['http://webscraping.com', 'http://webscraping.com/blog']
url2 = ['http://webscraping.com', 'http://webscraping.com/blog']
urls =[]
urls.append(url1)
urls.append(url2)
for url in urls:
r = Render(url, cb=scrape)
问题是您只能实例化一个 QApplication 对象。这是一个避免这种情况的更新版本,然后仅在下载 URL:
时运行 Qt 的执行循环import sys
from PyQt4.QtGui import QApplication
from PyQt4.QtCore import QUrl
from PyQt4.QtWebKit import QWebPage
class Render(QWebPage):
def __init__(self, cb):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.cb = cb
def crawl(self, url):
print 'Downloading', url
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
frame = self.mainFrame()
url = str(frame.url().toString())
html = frame.toHtml()
self.cb(url, html)
self.app.quit()
def scrape(url, html):
pass # add scraping code here
print len(html)
r = Render(cb=scrape)
urls = ['http://webscraping.com', 'http://webscraping.com/blog']
for url in urls:
r.crawl(url)
不幸的是,@hoju 的回答对我不起作用。
这是对我有用的方法(基本上是设置一个计时器来检查加载是否已完成)。
import sys
from PyQt4.QtGui import QApplication
from PyQt4.QtCore import QUrl, QTimer
from PyQt4.QtWebKit import QWebPage
class Render(QWebPage):
def __init__(self, url):
QWebPage.__init__(self)
self.frame = None
self.mainFrame().loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
def _loadFinished(self, result):
self.frame = self.mainFrame()
def go_again():
global r, timer, urls
if(len(urls)>0):
print("loading",urls[0])
r = Render(urls.pop())
timer.start(1000)
else:
print("finished")
sys.exit(app.exec_())
def check_done():
global r, timer
if r.frame is not None:
timer.stop()
html_result = r.frame.toHtml()
#do something with html
print("loaded")
go_again()
app = QApplication(sys.argv)
urls = ['
timer = QTimer()
timer.timeout.connect(check_done)
#check every second
go_again()
sys.exit(app.exec_())