Python 如何使用 Selenium Webdriver 提取 webelement
How to extract webelement with Selenium Webdriver in Python
您好,我正在尝试提取主要 table 的每一行元素的日期。日期元素位于名为 :
的标记中
<td style="display: none;">
这是我的脚本:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.headless = True
options.add_argument("window-size=1400,800")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("start-maximized")
options.add_argument("enable-automation")
options.add_argument("--disable-infobars")
options.add_argument("--disable-dev-shm-usage")
#We extract sport event data at this webpage
url = "https://www.coteur.com/cotes-foot.php"
driver = webdriver.Chrome(options=options)
driver.get(url)
date = [my_elem.text for my_elem in WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, '//td[contains(@style, "display: none;")]')))]
print(date)
我有这个输出:
Traceback (most recent call last):
File "./soccer_scraper_historic.py", line 36, in <module>
date = [my_elem.text for my_elem in WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, '//td[contains(@style, "display: none;")]')))]
File "/usr/local/lib/python3.8/dist-packages/selenium/webdriver/support/wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
也许你可以帮助解决这个问题。谢谢
使用 visibility_of_all_elements_located
您正在等待使用 display: none;
的元素 - 不可见变为可见。最好按位置而不是 display: none;
属性获取列,因为它不是唯一的。
您无法从不可见元素中获取文本,因此您可以使用属性 textContent
.
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 10)
driver.get(url)
dates = [d.get_attribute("textContent") for d in
wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#mediaTable tr[role=row] td:nth-child(4)")))]
您好,我正在尝试提取主要 table 的每一行元素的日期。日期元素位于名为 :
的标记中<td style="display: none;">
这是我的脚本:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.headless = True
options.add_argument("window-size=1400,800")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("start-maximized")
options.add_argument("enable-automation")
options.add_argument("--disable-infobars")
options.add_argument("--disable-dev-shm-usage")
#We extract sport event data at this webpage
url = "https://www.coteur.com/cotes-foot.php"
driver = webdriver.Chrome(options=options)
driver.get(url)
date = [my_elem.text for my_elem in WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, '//td[contains(@style, "display: none;")]')))]
print(date)
我有这个输出:
Traceback (most recent call last):
File "./soccer_scraper_historic.py", line 36, in <module>
date = [my_elem.text for my_elem in WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, '//td[contains(@style, "display: none;")]')))]
File "/usr/local/lib/python3.8/dist-packages/selenium/webdriver/support/wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
也许你可以帮助解决这个问题。谢谢
使用 visibility_of_all_elements_located
您正在等待使用 display: none;
的元素 - 不可见变为可见。最好按位置而不是 display: none;
属性获取列,因为它不是唯一的。
您无法从不可见元素中获取文本,因此您可以使用属性 textContent
.
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 10)
driver.get(url)
dates = [d.get_attribute("textContent") for d in
wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#mediaTable tr[role=row] td:nth-child(4)")))]