为什么我从一个而不是另一个获得输出?
Why do I get output from one but not the other?
from bs4 import BeautifulSoup
import requests
import re
def getHTMLdocument(url):
response = requests.get(url)
return response.text
def correct_url(url1):
if not url1.startswith('https://www.parliament.gov.sg'):
url1 = f'https://www.parliament.gov.sg{url1}'
return url1
url_to_scrape = 'https://www.parliament.gov.sg/mps/list-of-current-mps'
links = []
while True:
html_document = getHTMLdocument(url_to_scrape)
soup = BeautifulSoup(html_document, 'lxml')
if soup.find_all('a', attrs={'href': re.compile("/details/")}) == []:
break
for link in soup.find_all('a', attrs={'href': re.compile("/details/")}):
if link.get('href') not in links:
links.append(correct_url(link.get('href')))
for link in links:
url = link
member_info = 'mp-designation-wrap'
**member_info = 'mp-constituency-wrap'**
page = requests.get(url)
soup = BeautifulSoup(page.text, 'lxml')
txt1 = soup.find('div', attrs={'class': member_info})
textoutput = txt1.text
print(textoutput)
break
我试图分开不同的类别以单独使用保存,但是,我只在使用 member_info = 'mp-designation-wrap' 时得到输出并且我得到一个 AttributeError: 'NoneType'使用 'mp-constituency-wrap' 时对象没有属性 'text'。
我不明白为什么它会给我不同的结果,如果有人能帮助我理解为什么会这样并指出正确的方向,那就太好了
出现此错误的原因是,您尝试 select 的元素在您的某些资源中不存在,因此您必须在调用 .text
.[=12 之前进行检查=]
for link in links:
page = requests.get(link)
soup = BeautifulSoup(page.text, 'lxml')
text1 = e.text if (e := soup.find('div', attrs={'class': 'mp-designation-wrap'})) else None
text2 = e.text if (e := soup.find('div', attrs={'class': 'mp-constituency-wrap'})) else None
print(text2)
from bs4 import BeautifulSoup
import requests
import re
def getHTMLdocument(url):
response = requests.get(url)
return response.text
def correct_url(url1):
if not url1.startswith('https://www.parliament.gov.sg'):
url1 = f'https://www.parliament.gov.sg{url1}'
return url1
url_to_scrape = 'https://www.parliament.gov.sg/mps/list-of-current-mps'
links = []
while True:
html_document = getHTMLdocument(url_to_scrape)
soup = BeautifulSoup(html_document, 'lxml')
if soup.find_all('a', attrs={'href': re.compile("/details/")}) == []:
break
for link in soup.find_all('a', attrs={'href': re.compile("/details/")}):
if link.get('href') not in links:
links.append(correct_url(link.get('href')))
for link in links:
url = link
member_info = 'mp-designation-wrap'
**member_info = 'mp-constituency-wrap'**
page = requests.get(url)
soup = BeautifulSoup(page.text, 'lxml')
txt1 = soup.find('div', attrs={'class': member_info})
textoutput = txt1.text
print(textoutput)
break
我试图分开不同的类别以单独使用保存,但是,我只在使用 member_info = 'mp-designation-wrap' 时得到输出并且我得到一个 AttributeError: 'NoneType'使用 'mp-constituency-wrap' 时对象没有属性 'text'。 我不明白为什么它会给我不同的结果,如果有人能帮助我理解为什么会这样并指出正确的方向,那就太好了
出现此错误的原因是,您尝试 select 的元素在您的某些资源中不存在,因此您必须在调用 .text
.[=12 之前进行检查=]
for link in links:
page = requests.get(link)
soup = BeautifulSoup(page.text, 'lxml')
text1 = e.text if (e := soup.find('div', attrs={'class': 'mp-designation-wrap'})) else None
text2 = e.text if (e := soup.find('div', attrs={'class': 'mp-constituency-wrap'})) else None
print(text2)