如何将 class 方法的输出输出到全局变量中
How can I get output from method of class into global variable
我正在尝试从 javascript 网页获取一些数据。我的代码正在生成多个链接并一一解析它们。解析输出是列表。我在 here 的帮助下编写了这段代码。但它会在 class 内生成列表。我想将列表项插入到 sqlite table 中,因此我想将本地列表项设为全局。我试图创建一个全局列表,将其放入 class,然后附加到它并 return。我试图通过 processCurrentPage
方法直接将它们插入数据库并尝试在 class 下创建一个列表并通过 Webpage.list 访问它。但是 none 这些方法奏效了。我的尝试之一在这里,但不是最好的 - 这只是一个例子。我已经尝试过很多这样的选择。请问有什么好的处理方法吗?
P.S: 我是Python的新人,研究了整整两天,阅读了所有class文档,但找不到方法。
import sys
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
import requests
from bs4 import BeautifulSoup
import bs4 as bs
class WebPage(QtWebEngineWidgets.QWebEnginePage):
alldatas=[]
def __init__(self):
super(WebPage, self).__init__()
self.loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext
@property
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QtCore.QUrl(url))
return True
def processCurrentPage(self, html):
url = self.url().toString()
# do stuff with html...
soup = bs.BeautifulSoup(html, 'html.parser')
data = soup.find('div', class_='tablo_dual_board')
data1 = data.text
data2 = data1.splitlines()
self.alldatas+=data2
if not self.fetchNext:
QtWidgets.qApp.quit()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
def javaScriptConsoleMessage(self, QWebEnginePage_JavaScriptConsoleMessageLevel, p_str, p_int, p_str_1):
# disable javascript error output
pass
if __name__ == '__main__':
# generate some test urls
onexurl = "https://1xbahis1.com/en/live/Football/"
r = requests.get(onexurl)
soup = BeautifulSoup(r.content, "html.parser")
income = soup.find_all("ul", {"id":"games_content"})
links = soup.find_all("a", {"class": "c-events__name"})
urls = []
for matchlink in links:
urls.append("https://1xbahis1.com/en/"+(matchlink.get("href")))
app = QtWidgets.QApplication(sys.argv)
webpage = WebPage()
webpage.start(urls)
print(webpage.alldatas)
sys.exit(app.exec_())
以下是您的脚本版本,可以满足您的要求。 scrape_page
函数为每个处理的 url 调用,并将数据添加到全局 records
列表中。 process_records
函数被调用 一次 在所有页面都被抓取之后。您可以使用此功能将记录添加到您的数据库中。
import sys
import requests
from bs4 import BeautifulSoup
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
records = []
def scrape_page(url, html):
print('scrape page:', url)
soup = BeautifulSoup(html, 'html.parser')
data = soup.find('div', class_='tablo_dual_board')
if data is not None:
records.append(data.text.splitlines())
else:
print('error: could not find tablo_dual_board')
def process_records():
# add record to database ...
print('process records:', len(records))
def generate_urls():
onexurl = "https://1xbahis1.com/en/live/Football/"
reply = requests.get(onexurl)
soup = BeautifulSoup(reply.content, "html.parser")
income = soup.find_all("ul", {"id":"games_content"})
links = soup.find_all("a", {"class": "c-events__name"})
urls = []
for matchlink in links:
urls.append("https://1xbahis1.com/en/"+(matchlink.get("href")))
return urls
class WebPage(QtWebEngineWidgets.QWebEnginePage):
def __init__(self):
super(WebPage, self).__init__()
self.loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext()
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QtCore.QUrl(url))
return True
def processCurrentPage(self, html):
scrape_page(self.url().toString(), html)
if not self.fetchNext():
process_records()
QtWidgets.qApp.quit()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
def javaScriptConsoleMessage(self, QWebEnginePage_JavaScriptConsoleMessageLevel, p_str, p_int, p_str_1):
# disable javascript error output
pass
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
webpage = WebPage()
webpage.start(generate_urls())
sys.exit(app.exec_())
我正在尝试从 javascript 网页获取一些数据。我的代码正在生成多个链接并一一解析它们。解析输出是列表。我在 here 的帮助下编写了这段代码。但它会在 class 内生成列表。我想将列表项插入到 sqlite table 中,因此我想将本地列表项设为全局。我试图创建一个全局列表,将其放入 class,然后附加到它并 return。我试图通过 processCurrentPage
方法直接将它们插入数据库并尝试在 class 下创建一个列表并通过 Webpage.list 访问它。但是 none 这些方法奏效了。我的尝试之一在这里,但不是最好的 - 这只是一个例子。我已经尝试过很多这样的选择。请问有什么好的处理方法吗?
P.S: 我是Python的新人,研究了整整两天,阅读了所有class文档,但找不到方法。
import sys
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
import requests
from bs4 import BeautifulSoup
import bs4 as bs
class WebPage(QtWebEngineWidgets.QWebEnginePage):
alldatas=[]
def __init__(self):
super(WebPage, self).__init__()
self.loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext
@property
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QtCore.QUrl(url))
return True
def processCurrentPage(self, html):
url = self.url().toString()
# do stuff with html...
soup = bs.BeautifulSoup(html, 'html.parser')
data = soup.find('div', class_='tablo_dual_board')
data1 = data.text
data2 = data1.splitlines()
self.alldatas+=data2
if not self.fetchNext:
QtWidgets.qApp.quit()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
def javaScriptConsoleMessage(self, QWebEnginePage_JavaScriptConsoleMessageLevel, p_str, p_int, p_str_1):
# disable javascript error output
pass
if __name__ == '__main__':
# generate some test urls
onexurl = "https://1xbahis1.com/en/live/Football/"
r = requests.get(onexurl)
soup = BeautifulSoup(r.content, "html.parser")
income = soup.find_all("ul", {"id":"games_content"})
links = soup.find_all("a", {"class": "c-events__name"})
urls = []
for matchlink in links:
urls.append("https://1xbahis1.com/en/"+(matchlink.get("href")))
app = QtWidgets.QApplication(sys.argv)
webpage = WebPage()
webpage.start(urls)
print(webpage.alldatas)
sys.exit(app.exec_())
以下是您的脚本版本,可以满足您的要求。 scrape_page
函数为每个处理的 url 调用,并将数据添加到全局 records
列表中。 process_records
函数被调用 一次 在所有页面都被抓取之后。您可以使用此功能将记录添加到您的数据库中。
import sys
import requests
from bs4 import BeautifulSoup
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
records = []
def scrape_page(url, html):
print('scrape page:', url)
soup = BeautifulSoup(html, 'html.parser')
data = soup.find('div', class_='tablo_dual_board')
if data is not None:
records.append(data.text.splitlines())
else:
print('error: could not find tablo_dual_board')
def process_records():
# add record to database ...
print('process records:', len(records))
def generate_urls():
onexurl = "https://1xbahis1.com/en/live/Football/"
reply = requests.get(onexurl)
soup = BeautifulSoup(reply.content, "html.parser")
income = soup.find_all("ul", {"id":"games_content"})
links = soup.find_all("a", {"class": "c-events__name"})
urls = []
for matchlink in links:
urls.append("https://1xbahis1.com/en/"+(matchlink.get("href")))
return urls
class WebPage(QtWebEngineWidgets.QWebEnginePage):
def __init__(self):
super(WebPage, self).__init__()
self.loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext()
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QtCore.QUrl(url))
return True
def processCurrentPage(self, html):
scrape_page(self.url().toString(), html)
if not self.fetchNext():
process_records()
QtWidgets.qApp.quit()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
def javaScriptConsoleMessage(self, QWebEnginePage_JavaScriptConsoleMessageLevel, p_str, p_int, p_str_1):
# disable javascript error output
pass
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
webpage = WebPage()
webpage.start(generate_urls())
sys.exit(app.exec_())