尝试使用 selenium webdriver 提取数据时出现问题
Problem when trying to extract data with selenium webdriver
您好,我正在尝试提取此网页的赔率:
https://www.unibet.fr/sport/football
这是我的 python 脚本:
#!/usr/bin/python3
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import numpy as np
import os
options = Options()
options.headless = True
options.add_argument("window-size=1400,800")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("start-maximized")
options.add_argument("enable-automation")
options.add_argument("--disable-infobars")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=options)
driver.get('https://www.unibet.fr/sport/football')
odds = [my_elem.text for my_elem in WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, '//span[contains(@class, "ui-touchlink-needsclick price odd-price")]')))]
print(odds, '\n')
driver.close()
driver.quit()
输出结果是:
Traceback (most recent call last):
File "./azerty.py", line 26, in <module>
odds = [my_elem.text for my_elem in WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, '//span[contains(@class, "ui-touchlink-needsclick price odd-price")]')))]
File "/usr/local/lib/python3.8/dist-packages/selenium/webdriver/support/wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
此脚本 运行 与其他网页完美搭配,但在本例中并非如此。
一些帮助,谢谢
您超时的原因是您的页面是无限加载页面,即如果您转到页面底部,就会加载新元素。现在,即使 DOM 中的元素很少,它们也不会被 selenium 定位,因此您会超时。尝试先加载所有元素再定位。
driver.get('https://www.unibet.fr/sport/football')
WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.XPATH, '//a[@data-track-action="start_page"]'))) #Wait for page to load
#Scroll till page is loaded completely
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(2)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
#Get the elements
odds = [my_elem.text for my_elem in driver.find_elements_by_xpath( '//span[contains(@class, "ui-touchlink-needsclick price odd-price")]')]
print(odds, '\n')
driver.close()
driver.quit()
输出:
您好,我正在尝试提取此网页的赔率: https://www.unibet.fr/sport/football
这是我的 python 脚本:
#!/usr/bin/python3
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import numpy as np
import os
options = Options()
options.headless = True
options.add_argument("window-size=1400,800")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("start-maximized")
options.add_argument("enable-automation")
options.add_argument("--disable-infobars")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=options)
driver.get('https://www.unibet.fr/sport/football')
odds = [my_elem.text for my_elem in WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, '//span[contains(@class, "ui-touchlink-needsclick price odd-price")]')))]
print(odds, '\n')
driver.close()
driver.quit()
输出结果是:
Traceback (most recent call last):
File "./azerty.py", line 26, in <module>
odds = [my_elem.text for my_elem in WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, '//span[contains(@class, "ui-touchlink-needsclick price odd-price")]')))]
File "/usr/local/lib/python3.8/dist-packages/selenium/webdriver/support/wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
此脚本 运行 与其他网页完美搭配,但在本例中并非如此。 一些帮助,谢谢
您超时的原因是您的页面是无限加载页面,即如果您转到页面底部,就会加载新元素。现在,即使 DOM 中的元素很少,它们也不会被 selenium 定位,因此您会超时。尝试先加载所有元素再定位。
driver.get('https://www.unibet.fr/sport/football')
WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.XPATH, '//a[@data-track-action="start_page"]'))) #Wait for page to load
#Scroll till page is loaded completely
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(2)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
#Get the elements
odds = [my_elem.text for my_elem in driver.find_elements_by_xpath( '//span[contains(@class, "ui-touchlink-needsclick price odd-price")]')]
print(odds, '\n')
driver.close()
driver.quit()
输出: