如何使用 Selenium Python 打印电子邮件地址
How can I print email address using Selenium Python
<div id="MainCopy_ctl13_presentJob_EmailAddressPanel">
<a id="MainCopy_ctl13_presentJob_EmailAddress" href="mailto:dburse@bjcta.org">xyzmmm@tccp.org</a>
</div>
我试过使用
email = browser.find_elements_by_xpath('//div[@id="MainCopy_ctl13_presentJob_EmailAddress"]//a').text
print(email)
但是我没有得到结果。
元素已经存在了吗?或者可能是在元素被 Selenium 加载之前执行的代码?
考虑使用等待:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox()
driver.get("http://somedomain/url_that_delays_loading")
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "myDynamicElement"))
)
finally:
driver.quit()
a 标签内的电子邮件是 a 标签的 href,所以只需这样做:
使用硒:
from selenium import webdriver
driver = webdriver.Firefox()
driver.get("http://somedomain/url_that_delays_loading")
a_tag = driver.find_element_by_id('MainCopy_ctl13_presentJob_EmailAddress')
mail_link = a_tag.get_attribute("href")
mail_addrs = mail_link.split(':')[1]
print(mail_addrs)
使用Beautifulsoup:
from bs4 import BeautifulSoup
content="""
<div id="MainCopy_ctl13_presentJob_EmailAddressPanel">
a id="MainCopy_ctl13_presentJob_EmailAddress" href="mailto:dburse@bjcta.org">xyzmmm@tccp.org</a>
</div>"""
soup = BeautifulSoup(content)
a_tag = soup.find(id='MainCopy_ctl13_presentJob_EmailAddress')
mail_link = a_tag.attrs['href']
mail_addrs = mail_link.split(':')[1]
print(mail_addrs)
文本仅打印可见文本,对不在显示端口中的文本使用 textContent 属性:
email = browser.find_element_by_xpath('//div[@id="MainCopy_ctl13_presentJob_EmailAddressPanel"]//a').get_attribute("textContent")
print(email)
您使用的 id
属性即 MainCopy_ctl13_presentJob_EmailAddress
属于 <a>
标签而不是 <div>
要打印电子邮件地址,您可以使用以下任一方法:
使用 css_selector
和 get_attribute()
:
print(driver.find_element(By.CSS_SELECTOR, "a#MainCopy_ctl13_presentJob_EmailAddress").get_attribute("innerHTML"))
使用 xpath
和 text 属性:
print(driver.find_element(By.XPATH, "//a[@id='MainCopy_ctl13_presentJob_EmailAddress']").text)
理想情况下,您需要为 visibility_of_element_located() 引入 ,您可以使用以下任一方法 :
使用 CSS_SELECTOR
和 text 属性:
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a#MainCopy_ctl13_presentJob_EmailAddress"))).text)
使用 XPATH
和 get_attribute()
:
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//a[@id='MainCopy_ctl13_presentJob_EmailAddress']"))).get_attribute("innerHTML"))
注意:您必须添加以下导入:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
<div id="MainCopy_ctl13_presentJob_EmailAddressPanel">
<a id="MainCopy_ctl13_presentJob_EmailAddress" href="mailto:dburse@bjcta.org">xyzmmm@tccp.org</a>
</div>
我试过使用
email = browser.find_elements_by_xpath('//div[@id="MainCopy_ctl13_presentJob_EmailAddress"]//a').text
print(email)
但是我没有得到结果。
元素已经存在了吗?或者可能是在元素被 Selenium 加载之前执行的代码?
考虑使用等待:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox()
driver.get("http://somedomain/url_that_delays_loading")
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "myDynamicElement"))
)
finally:
driver.quit()
a 标签内的电子邮件是 a 标签的 href,所以只需这样做:
使用硒:
from selenium import webdriver
driver = webdriver.Firefox()
driver.get("http://somedomain/url_that_delays_loading")
a_tag = driver.find_element_by_id('MainCopy_ctl13_presentJob_EmailAddress')
mail_link = a_tag.get_attribute("href")
mail_addrs = mail_link.split(':')[1]
print(mail_addrs)
使用Beautifulsoup:
from bs4 import BeautifulSoup
content="""
<div id="MainCopy_ctl13_presentJob_EmailAddressPanel">
a id="MainCopy_ctl13_presentJob_EmailAddress" href="mailto:dburse@bjcta.org">xyzmmm@tccp.org</a>
</div>"""
soup = BeautifulSoup(content)
a_tag = soup.find(id='MainCopy_ctl13_presentJob_EmailAddress')
mail_link = a_tag.attrs['href']
mail_addrs = mail_link.split(':')[1]
print(mail_addrs)
文本仅打印可见文本,对不在显示端口中的文本使用 textContent 属性:
email = browser.find_element_by_xpath('//div[@id="MainCopy_ctl13_presentJob_EmailAddressPanel"]//a').get_attribute("textContent")
print(email)
您使用的 id
属性即 MainCopy_ctl13_presentJob_EmailAddress
属于 <a>
标签而不是 <div>
要打印电子邮件地址,您可以使用以下任一方法
使用
css_selector
和get_attribute()
:print(driver.find_element(By.CSS_SELECTOR, "a#MainCopy_ctl13_presentJob_EmailAddress").get_attribute("innerHTML"))
使用
xpath
和 text 属性:print(driver.find_element(By.XPATH, "//a[@id='MainCopy_ctl13_presentJob_EmailAddress']").text)
理想情况下,您需要为 visibility_of_element_located() 引入
使用
CSS_SELECTOR
和 text 属性:print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a#MainCopy_ctl13_presentJob_EmailAddress"))).text)
使用
XPATH
和get_attribute()
:print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//a[@id='MainCopy_ctl13_presentJob_EmailAddress']"))).get_attribute("innerHTML"))
注意:您必须添加以下导入:
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC