Selenium 中的 getAttributre("href") returns None
getAttributre("href") returns None in Selenium
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
browser = webdriver.Chrome()
## In the following segment, I show how you can get a list of news article URLs based on a keyword search.
## I use CNN as an example but news source would work.
base_url = u'https://www.cnn.com/search?q=politics&size=200'
browser.get(base_url)
time.sleep(1)
#Finds the container that contains every news article.
main_news_container = browser.find_element_by_class_name('cnn-search__results-list')
headlines_list = main_news_container.find_elements_by_class_name('cnn-search__result-headline')
#In main container get 'a'
#text_sections = main_news_container.find_elements_by_xpath("//a[@href]")
print(len(text_sections))
headlines = [];
links = [];
for elem in headlines_list:
#if "/2020/" in elem.get_attribute("href"):
#this is printing the link
#print(elem.get_attribute("href"))
links.append(elem.get_attribute("href"))
#this is printing the Headline
#print(elem.text)
headlines.append(elem.text)
#Find the text body_elements inside the main_news_container
body_elements = main_news_container.find_elements_by_class_name("cnn-search__result-body")
#this is how you get the body body_elements text
#print(body_elements[1].text)
print(links[0])
print(len(headlines))
print(len(body_elements))
我正在尝试从 headlines_list 的元素中获取 href,但它 returns None。但是我仍然可以通过说
来获取标题文本
elem.text
This is the inspect output of the web page source
您正在尝试获取 h3
的属性 href
,因为 class 在 h3
中。
您需要在 h3
元素中找到 a
元素。
我更改了包含您的标题的行以包含 a 元素:
headlines_list = main_news_container.find_element_by_css_selector("h3[class='cnn-search__result-headline']>a")
这是你的代码
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
browser = webdriver.Chrome()
## In the following segment, I show how you can get a list of news article URLs based on a keyword search.
## I use CNN as an example but news source would work.
base_url = u'https://www.cnn.com/search?q=politics&size=200'
browser.get(base_url)
time.sleep(1)
#Finds the container that contains every news article.
main_news_container = browser.find_element_by_class_name('cnn-search__results-list')
headlines_list = main_news_container.find_element_by_css_selector("h3[class='cnn-search__result-headline']>a")
#In main container get 'a'
#text_sections = main_news_container.find_elements_by_xpath("//a[@href]")
print(len(text_sections))
headlines = [];
links = [];
for elem in headlines_list:
#if "/2020/" in elem.get_attribute("href"):
#this is printing the link
#print(elem.get_attribute("href"))
links.append(elem.get_attribute("href"))
#this is printing the Headline
#print(elem.text)
headlines.append(elem.text)
#Find the text body_elements inside the main_news_container
body_elements = main_news_container.find_elements_by_class_name("cnn-search__result-body")
#this is how you get the body body_elements text
#print(body_elements[1].text)
print(links[0])
print(len(headlines))
print(len(body_elements))
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
browser = webdriver.Chrome()
## In the following segment, I show how you can get a list of news article URLs based on a keyword search.
## I use CNN as an example but news source would work.
base_url = u'https://www.cnn.com/search?q=politics&size=200'
browser.get(base_url)
time.sleep(1)
#Finds the container that contains every news article.
main_news_container = browser.find_element_by_class_name('cnn-search__results-list')
headlines_list = main_news_container.find_elements_by_class_name('cnn-search__result-headline')
#In main container get 'a'
#text_sections = main_news_container.find_elements_by_xpath("//a[@href]")
print(len(text_sections))
headlines = [];
links = [];
for elem in headlines_list:
#if "/2020/" in elem.get_attribute("href"):
#this is printing the link
#print(elem.get_attribute("href"))
links.append(elem.get_attribute("href"))
#this is printing the Headline
#print(elem.text)
headlines.append(elem.text)
#Find the text body_elements inside the main_news_container
body_elements = main_news_container.find_elements_by_class_name("cnn-search__result-body")
#this is how you get the body body_elements text
#print(body_elements[1].text)
print(links[0])
print(len(headlines))
print(len(body_elements))
我正在尝试从 headlines_list 的元素中获取 href,但它 returns None。但是我仍然可以通过说
来获取标题文本elem.text
This is the inspect output of the web page source
您正在尝试获取 h3
的属性 href
,因为 class 在 h3
中。
您需要在 h3
元素中找到 a
元素。
我更改了包含您的标题的行以包含 a 元素:
headlines_list = main_news_container.find_element_by_css_selector("h3[class='cnn-search__result-headline']>a")
这是你的代码
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
browser = webdriver.Chrome()
## In the following segment, I show how you can get a list of news article URLs based on a keyword search.
## I use CNN as an example but news source would work.
base_url = u'https://www.cnn.com/search?q=politics&size=200'
browser.get(base_url)
time.sleep(1)
#Finds the container that contains every news article.
main_news_container = browser.find_element_by_class_name('cnn-search__results-list')
headlines_list = main_news_container.find_element_by_css_selector("h3[class='cnn-search__result-headline']>a")
#In main container get 'a'
#text_sections = main_news_container.find_elements_by_xpath("//a[@href]")
print(len(text_sections))
headlines = [];
links = [];
for elem in headlines_list:
#if "/2020/" in elem.get_attribute("href"):
#this is printing the link
#print(elem.get_attribute("href"))
links.append(elem.get_attribute("href"))
#this is printing the Headline
#print(elem.text)
headlines.append(elem.text)
#Find the text body_elements inside the main_news_container
body_elements = main_news_container.find_elements_by_class_name("cnn-search__result-body")
#this is how you get the body body_elements text
#print(body_elements[1].text)
print(links[0])
print(len(headlines))
print(len(body_elements))