无法点击不同结果从内页解析标题
Unable to click on different results to parse the title from inner pages
我在 python 中编写了一个脚本,用于在 google 地图的搜索框中启动搜索后填充结果。我的脚本完美地做到了这一点。现在,我尝试单击 each of the results to go one layer deep 并从那里解析标题。
When I run the script, I get one title successfully but then the script throws the same common error element is not attached to the dom
, although I've taken all the measures to get rid of that.
我在这里使用这个关键字 motels in new jersey
作为搜索。
我试过:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://www.google.com/maps/search/")
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input#searchboxinput"))).send_keys("motels in new jersey")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#searchbox-searchbutton"))).click()
while True:
try:
for item in wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "[class='section-result'] h3[class='section-result-title']"))):
# click on each of the results
item.click()
# the script now in inner page to parse the title
name = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"h1.section-hero-header-title-title"))).text
print(name)
# click on the "Back to results" link located on the top left to get back to the results page
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"button[class^='section-back-to-list-button']"))).click()
# wait for the spinner to be invisible
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, "#searchbox[class*='loading']")))
# tried to get rid of staleness condition
wait.until(EC.staleness_of(item))
except Exception:
break
如何点击不同结果解析内页标题?
解析搜索结果的标题,例如Holiday Inn Express、Hyatt House Mt Laurel 等,您不必 click()
每个结果然后去一层深的。相反,您可以为所需的 visibility_of_all_elements_located()
引入 WebDriverWait,并且可以使用以下 Locator Strategy:
代码块:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("start-maximized")
chrome_options.add_argument('disable-infobars')
driver = webdriver.Chrome(options=chrome_options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get("https://www.google.com/maps/search/")
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input#searchboxinput"))).send_keys("motels in new jersey")
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#searchbox-searchbutton"))).click()
print([my_elem.get_attribute("innerHTML") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.section-result-title-container h3.section-result-title > span")))])
控制台输出:
['Holiday Inn Express & Suites Philadelphia - Mt. Laurel', 'Hyatt House Mt Laurel', 'Motel 6 East Brunswick', 'Motel 6 New Brunswick NJ', 'Skyview Motel', 'Anchor Motel', 'Motel 6 Philadelphia - MT Laurel NJ', 'Motel 6 Piscataway', 'Motel 6 Elizabeth - Newark Liberty Intl Airport', 'Twin Oaks Motel', 'Shore Hills Motel', 'Franklin Terrace Motel', 'Loop Inn Motel', 'Hershey Motel', 'Royal Motel', 'Ala Moana Motel', 'Bird of Paradise Motel', 'Appalachian Motel', 'Hudson Plaza Motel', 'Fair Motel', 'Days Inn & Suites by Wyndham Cherry Hill - Philadelphia', 'Holly Hill Motel']
要继续遍历受到任何 DOM
操作影响的列表,您必须在每个 .click()
事件或任何需要加载的事件后刷新元素列表当前范围内的元素。请尝试以下代码来解决您的问题:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://www.google.com/maps/search/")
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input#searchboxinput"))).send_keys("motels in new jersey")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#searchbox-searchbutton"))).click()
while True:
try:
for count, item in enumerate(wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "[class='section-result'] h3[class='section-result-title']")))):
refreshList = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "[class='section-result'] h3[class='section-result-title']")))
# click on each of the results
refreshList[count].click()
# the script now in inner page to parse the title
name = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"h1.section-hero-header-title-title"))).text
print(name)
# click on the "Back to results" link located on the top left to get back to the results page
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"button[class^='section-back-to-list-button']"))).click()
# wait for the spinner to be invisible
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, "#searchbox[class*='loading']")))
# tried to get rid of staleness condition
wait.until(EC.staleness_of(refreshList[count]))
except Exception:
break
我在 python 中编写了一个脚本,用于在 google 地图的搜索框中启动搜索后填充结果。我的脚本完美地做到了这一点。现在,我尝试单击 each of the results to go one layer deep 并从那里解析标题。
When I run the script, I get one title successfully but then the script throws the same common error
element is not attached to the dom
, although I've taken all the measures to get rid of that.
我在这里使用这个关键字 motels in new jersey
作为搜索。
我试过:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://www.google.com/maps/search/")
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input#searchboxinput"))).send_keys("motels in new jersey")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#searchbox-searchbutton"))).click()
while True:
try:
for item in wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "[class='section-result'] h3[class='section-result-title']"))):
# click on each of the results
item.click()
# the script now in inner page to parse the title
name = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"h1.section-hero-header-title-title"))).text
print(name)
# click on the "Back to results" link located on the top left to get back to the results page
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"button[class^='section-back-to-list-button']"))).click()
# wait for the spinner to be invisible
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, "#searchbox[class*='loading']")))
# tried to get rid of staleness condition
wait.until(EC.staleness_of(item))
except Exception:
break
如何点击不同结果解析内页标题?
解析搜索结果的标题,例如Holiday Inn Express、Hyatt House Mt Laurel 等,您不必 click()
每个结果然后去一层深的。相反,您可以为所需的 visibility_of_all_elements_located()
引入 WebDriverWait,并且可以使用以下 Locator Strategy:
代码块:
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("start-maximized") chrome_options.add_argument('disable-infobars') driver = webdriver.Chrome(options=chrome_options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe') driver.get("https://www.google.com/maps/search/") WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input#searchboxinput"))).send_keys("motels in new jersey") WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#searchbox-searchbutton"))).click() print([my_elem.get_attribute("innerHTML") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.section-result-title-container h3.section-result-title > span")))])
控制台输出:
['Holiday Inn Express & Suites Philadelphia - Mt. Laurel', 'Hyatt House Mt Laurel', 'Motel 6 East Brunswick', 'Motel 6 New Brunswick NJ', 'Skyview Motel', 'Anchor Motel', 'Motel 6 Philadelphia - MT Laurel NJ', 'Motel 6 Piscataway', 'Motel 6 Elizabeth - Newark Liberty Intl Airport', 'Twin Oaks Motel', 'Shore Hills Motel', 'Franklin Terrace Motel', 'Loop Inn Motel', 'Hershey Motel', 'Royal Motel', 'Ala Moana Motel', 'Bird of Paradise Motel', 'Appalachian Motel', 'Hudson Plaza Motel', 'Fair Motel', 'Days Inn & Suites by Wyndham Cherry Hill - Philadelphia', 'Holly Hill Motel']
要继续遍历受到任何 DOM
操作影响的列表,您必须在每个 .click()
事件或任何需要加载的事件后刷新元素列表当前范围内的元素。请尝试以下代码来解决您的问题:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://www.google.com/maps/search/")
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input#searchboxinput"))).send_keys("motels in new jersey")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#searchbox-searchbutton"))).click()
while True:
try:
for count, item in enumerate(wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "[class='section-result'] h3[class='section-result-title']")))):
refreshList = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "[class='section-result'] h3[class='section-result-title']")))
# click on each of the results
refreshList[count].click()
# the script now in inner page to parse the title
name = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"h1.section-hero-header-title-title"))).text
print(name)
# click on the "Back to results" link located on the top left to get back to the results page
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"button[class^='section-back-to-list-button']"))).click()
# wait for the spinner to be invisible
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, "#searchbox[class*='loading']")))
# tried to get rid of staleness condition
wait.until(EC.staleness_of(refreshList[count]))
except Exception:
break