使用 tkinter 输入一个变量,被调用
Using tkinter to input into a variable, to be called
我目前正在开发一种类似爬虫的程序,它将进入维基百科页面,并以其当前形式从页面中抓取参考文献。
我想要一个允许用户输入维基百科页面的图形用户界面。我希望将输入附加到 selectWikiPage
变量,但到目前为止运气不好。
下面是我当前的代码。
import requests
from bs4 import BeautifulSoup
import re
from tkinter import *
#begin tkinter gui
def show_entry_fields():
print("Wikipedia URL: %s" % (e1.get()))
e1.delete(0,END)
master = Tk()
Label(master, text="Wikipedia URL").grid(row=0)
e1 = Entry(master)
e1.insert(10,"http://en.wikipedia.org/wiki/randomness")
e1.grid(row=0, column=1)
Button(master, text='Scrape', command=master.quit).grid(row=3, column=0, sticky=W, pady=4)
mainloop( )
session = requests.Session()
selectWikiPage = input(print("Please enter the Wikipedia page you wish to scrape from"))
if "wikipedia" in selectWikiPage:
html = session.post(selectWikiPage)
bsObj = BeautifulSoup(html.text, "html.parser")
findReferences = bsObj.find('ol', {'class': 'references'}) #isolate refereces section of page
href = BeautifulSoup(str(findReferences), "html.parser")
links = [a["href"] for a in href.find_all("a", href=True)]
for link in links:
print("Link: " + link)
else:
print("Error: Please enter a valid Wikipedia URL")
非常感谢。
这是一个基于您的代码的小型示例;它允许使用输入字段捕获要访问的 wiki 页面的值,并将其打印在控制台上。
然后您可以使用此 url 继续您的抓取。
from tkinter import *
def m_quit():
global wiki_url
wiki_url += e1.get() + '/'
print('quitting')
master.destroy()
wiki_url = 'http://en.wikipedia.org/wiki/'
master = Tk()
Label(master, text="Wikipedia URL").grid(row=0)
e1 = Entry(master)
e1.grid(row=0, column=1)
Button(master, text='Scrape', command=m_quit).grid(row=3, column=0, sticky=W, pady=4)
mainloop()
print(wiki_url)
代码几乎是正确的。一些修改就足够了。希望这可以帮助。如果您需要任何进一步的说明,请发表评论。
import requests
from bs4 import BeautifulSoup
import re
from tkinter import *
# you can declare selectWikiPage and master to be global and then avoid passing them around
# begin tkinter gui
def show_entry_fields():
print("Wikipedia URL: %s" % (e1.get()))
e1.delete(0, END)
#utility which selects all the text from start to end in widget
def select_all(event=None):
event.widget.select_range(0, 'end')
return 'break'
#the code is same, just obtained the URL from the widget using get()
def custom_scrape(e1, master):
session = requests.Session()
# selectWikiPage = input("Please enter the Wikipedia page you wish to scrape from")
selectWikiPage = e1.get()
if "wikipedia" in selectWikiPage:
html = session.post(selectWikiPage)
bsObj = BeautifulSoup(html.text, "html.parser")
findReferences = bsObj.find('ol', {'class': 'references'}) # isolate refereces section of page
href = BeautifulSoup(str(findReferences), "html.parser")
links = [a["href"] for a in href.find_all("a", href=True)]
for link in links:
print("Link: " + link)
else:
print("Error: Please enter a valid Wikipedia URL")
master.quit()
master = Tk()
Label(master, text="Wikipedia URL").grid(row=0)
e1 = Entry(master)
#bind ctrl+a for selecting all the contents of Entry widget
e1.bind('<Control-a>', select_all)
e1.insert(10, "Enter a wikipedia URL")
e1.grid(row=0, column=1)
#here, command attribute takes a lambda which itself doesnot take any args,
#but in turn calls another function which accepts
#arguments
#Refer to: for details
Button(master, text='Scrape', command=lambda: custom_scrape(e1, master)).grid(row=3, column=0, sticky=W, pady=4)
mainloop()
我目前正在开发一种类似爬虫的程序,它将进入维基百科页面,并以其当前形式从页面中抓取参考文献。
我想要一个允许用户输入维基百科页面的图形用户界面。我希望将输入附加到 selectWikiPage
变量,但到目前为止运气不好。
下面是我当前的代码。
import requests
from bs4 import BeautifulSoup
import re
from tkinter import *
#begin tkinter gui
def show_entry_fields():
print("Wikipedia URL: %s" % (e1.get()))
e1.delete(0,END)
master = Tk()
Label(master, text="Wikipedia URL").grid(row=0)
e1 = Entry(master)
e1.insert(10,"http://en.wikipedia.org/wiki/randomness")
e1.grid(row=0, column=1)
Button(master, text='Scrape', command=master.quit).grid(row=3, column=0, sticky=W, pady=4)
mainloop( )
session = requests.Session()
selectWikiPage = input(print("Please enter the Wikipedia page you wish to scrape from"))
if "wikipedia" in selectWikiPage:
html = session.post(selectWikiPage)
bsObj = BeautifulSoup(html.text, "html.parser")
findReferences = bsObj.find('ol', {'class': 'references'}) #isolate refereces section of page
href = BeautifulSoup(str(findReferences), "html.parser")
links = [a["href"] for a in href.find_all("a", href=True)]
for link in links:
print("Link: " + link)
else:
print("Error: Please enter a valid Wikipedia URL")
非常感谢。
这是一个基于您的代码的小型示例;它允许使用输入字段捕获要访问的 wiki 页面的值,并将其打印在控制台上。
然后您可以使用此 url 继续您的抓取。
from tkinter import *
def m_quit():
global wiki_url
wiki_url += e1.get() + '/'
print('quitting')
master.destroy()
wiki_url = 'http://en.wikipedia.org/wiki/'
master = Tk()
Label(master, text="Wikipedia URL").grid(row=0)
e1 = Entry(master)
e1.grid(row=0, column=1)
Button(master, text='Scrape', command=m_quit).grid(row=3, column=0, sticky=W, pady=4)
mainloop()
print(wiki_url)
代码几乎是正确的。一些修改就足够了。希望这可以帮助。如果您需要任何进一步的说明,请发表评论。
import requests
from bs4 import BeautifulSoup
import re
from tkinter import *
# you can declare selectWikiPage and master to be global and then avoid passing them around
# begin tkinter gui
def show_entry_fields():
print("Wikipedia URL: %s" % (e1.get()))
e1.delete(0, END)
#utility which selects all the text from start to end in widget
def select_all(event=None):
event.widget.select_range(0, 'end')
return 'break'
#the code is same, just obtained the URL from the widget using get()
def custom_scrape(e1, master):
session = requests.Session()
# selectWikiPage = input("Please enter the Wikipedia page you wish to scrape from")
selectWikiPage = e1.get()
if "wikipedia" in selectWikiPage:
html = session.post(selectWikiPage)
bsObj = BeautifulSoup(html.text, "html.parser")
findReferences = bsObj.find('ol', {'class': 'references'}) # isolate refereces section of page
href = BeautifulSoup(str(findReferences), "html.parser")
links = [a["href"] for a in href.find_all("a", href=True)]
for link in links:
print("Link: " + link)
else:
print("Error: Please enter a valid Wikipedia URL")
master.quit()
master = Tk()
Label(master, text="Wikipedia URL").grid(row=0)
e1 = Entry(master)
#bind ctrl+a for selecting all the contents of Entry widget
e1.bind('<Control-a>', select_all)
e1.insert(10, "Enter a wikipedia URL")
e1.grid(row=0, column=1)
#here, command attribute takes a lambda which itself doesnot take any args,
#but in turn calls another function which accepts
#arguments
#Refer to: for details
Button(master, text='Scrape', command=lambda: custom_scrape(e1, master)).grid(row=3, column=0, sticky=W, pady=4)
mainloop()