如何迭代与 Web 数据关联的函数?
How can I iterate functions associated with web data?
找到第3位的link(名字是1)。按照 link。重复此过程 4 次。答案是您检索到的姓氏。
姓名顺序:Fikret Montgomery Mhairade Butchi Anayah
我的代码:
import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import ssl
count=0
conec=list()
conec2=list()
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode=ssl.CERT_NONE
class linker():
def conectar(self,n,u):
url = u
html = urllib.request.urlopen(url, context=ctx).read()
soup = BeautifulSoup (html,'html.parser')
tags = soup('a')
for tag in tags:
link=tag.get('href', None)
conec.append(link)
new_link=conec[n-1]
return new_link
def new_page(self,n,u):
url= u
html = urllib.request.urlopen(url, context=ctx).read()
soup = BeautifulSoup (html,'html.parser')
tags = soup('a')
for tag in tags:
link=tag.get('href', None)
conec2.append(link)
new_link2=conec2[n-1]
return new_link2
count=int(input('Enter count: '))
n=int(input('Enter position: '))
x=linker()
while count!=0:
u='http://py4e-data.dr-chuck.net/known_by_Fikret.html'
u=x.conectar(n,u)
a=x.new_page(n,u)
count-=1
print(a)
输出:
Enter count: 4
Enter position: 3
http://py4e-data.dr-chuck.net/known_by_Mhairade.html
只显示到Mhairade,我尝试了一堆东西但没有成功。
您可以根据用户输入对此进行调整,但根据您规定的要求,这似乎可行。 [注意:您需要 Python 3.7+ 和 requests 模块]
import requests
from bs4 import BeautifulSoup as BS
def gethref(url, pos):
if url and pos >= 0:
try:
with requests.Session() as session:
(r := session.get(url)).raise_for_status()
return BS(r.text, 'lxml').find_all('a')[pos].get('href', None)
except Exception:
pass
return None
href = 'http://py4e-data.dr-chuck.net/known_by_Fikret.html'
for _ in range(4):
href = gethref(href, 2)
print(href)
这是一个使用 requests
的递归解决方案。
import requests
from bs4 import BeautifulSoup
url = 'http://py4e-data.dr-chuck.net/known_by_Fikret.html'
def solve(url, count, position):
# Exit Condition
if count <= 0:
return
r = requests.get(url)
soup = BeautifulSoup(r.text, 'lxml')
# Scraping the Link and Name
u = soup.find('ul').select_one(f'li:nth-of-type({position})').find('a')
t_url, name = u['href'], u.text
print(f'Link: {t_url}\nName: {name}\n')
# Calling the same function with the above scraped URL
solve(t_url, count-1, position)
count = int(input('Enter count: '))
position = int(input('Enter Position: '))
solve(url, count, position)
Enter count: 4
Enter Position: 3
Link: http://py4e-data.dr-chuck.net/known_by_Montgomery.html
Name: Montgomery
Link: http://py4e-data.dr-chuck.net/known_by_Mhairade.html
Name: Mhairade
Link: http://py4e-data.dr-chuck.net/known_by_Butchi.html
Name: Butchi
Link: http://py4e-data.dr-chuck.net/known_by_Anayah.html
Name: Anayah
找到第3位的link(名字是1)。按照 link。重复此过程 4 次。答案是您检索到的姓氏。 姓名顺序:Fikret Montgomery Mhairade Butchi Anayah
我的代码:
import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import ssl
count=0
conec=list()
conec2=list()
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode=ssl.CERT_NONE
class linker():
def conectar(self,n,u):
url = u
html = urllib.request.urlopen(url, context=ctx).read()
soup = BeautifulSoup (html,'html.parser')
tags = soup('a')
for tag in tags:
link=tag.get('href', None)
conec.append(link)
new_link=conec[n-1]
return new_link
def new_page(self,n,u):
url= u
html = urllib.request.urlopen(url, context=ctx).read()
soup = BeautifulSoup (html,'html.parser')
tags = soup('a')
for tag in tags:
link=tag.get('href', None)
conec2.append(link)
new_link2=conec2[n-1]
return new_link2
count=int(input('Enter count: '))
n=int(input('Enter position: '))
x=linker()
while count!=0:
u='http://py4e-data.dr-chuck.net/known_by_Fikret.html'
u=x.conectar(n,u)
a=x.new_page(n,u)
count-=1
print(a)
输出:
Enter count: 4
Enter position: 3
http://py4e-data.dr-chuck.net/known_by_Mhairade.html
只显示到Mhairade,我尝试了一堆东西但没有成功。
您可以根据用户输入对此进行调整,但根据您规定的要求,这似乎可行。 [注意:您需要 Python 3.7+ 和 requests 模块]
import requests
from bs4 import BeautifulSoup as BS
def gethref(url, pos):
if url and pos >= 0:
try:
with requests.Session() as session:
(r := session.get(url)).raise_for_status()
return BS(r.text, 'lxml').find_all('a')[pos].get('href', None)
except Exception:
pass
return None
href = 'http://py4e-data.dr-chuck.net/known_by_Fikret.html'
for _ in range(4):
href = gethref(href, 2)
print(href)
这是一个使用 requests
的递归解决方案。
import requests
from bs4 import BeautifulSoup
url = 'http://py4e-data.dr-chuck.net/known_by_Fikret.html'
def solve(url, count, position):
# Exit Condition
if count <= 0:
return
r = requests.get(url)
soup = BeautifulSoup(r.text, 'lxml')
# Scraping the Link and Name
u = soup.find('ul').select_one(f'li:nth-of-type({position})').find('a')
t_url, name = u['href'], u.text
print(f'Link: {t_url}\nName: {name}\n')
# Calling the same function with the above scraped URL
solve(t_url, count-1, position)
count = int(input('Enter count: '))
position = int(input('Enter Position: '))
solve(url, count, position)
Enter count: 4
Enter Position: 3
Link: http://py4e-data.dr-chuck.net/known_by_Montgomery.html
Name: Montgomery
Link: http://py4e-data.dr-chuck.net/known_by_Mhairade.html
Name: Mhairade
Link: http://py4e-data.dr-chuck.net/known_by_Butchi.html
Name: Butchi
Link: http://py4e-data.dr-chuck.net/known_by_Anayah.html
Name: Anayah