获取 google 地图中没有 API 的永久关闭地点的评论总数

Question

我正在学习 python 以及网络抓取，我想从永久关闭的餐厅的 google 地图中获取评论数量，但我做不到，请你帮忙好吗？谢谢

from bs4 import BeautifulSoup

url = 'https://www.google.com/maps?q=asia+halal+restaurant+aichi+japan+open+date&safe=strict&rlz=1C1GCEA_enID892ID892&sxsrf=ALeKk01NqaBLM8bXeVVS6M6tv9kAy0G6qQ:1616997971678&gs_lcp=Cgdnd3Mtd2l6EAM6BwgjELADECc6BQghEKABOgQIIRAVOgcIIRAKEKABUIUIWKojYOckaABwAHgAgAHHAogB7RGSAQcxLjUuNC4ymAEAoAEBqgEHZ3dzLXdpesgBAcABAQ&uact=5&um=1&ie=UTF-8&sa=X&ved=2ahUKEwjbhef-7NTvAhWa93MBHaFHCzYQ_AUoAXoECAEQAw'

import requests
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
ps = soup.find_all(string = 'クチコミ')
ps

我也试过使用下面 chrome 开发者工具的 find 'class' 和 'span aria-label' 但还是不行

browser picture for html class

#ps = soup.find_all(class_='h0ySl-wcwwM-E70qVe-list')
#ps = soup.find_all('span aria-label')
#total_rev = ps.get_text()
#total_rev

这是我尝试使用 selenium 的代码

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from bs4 import BeautifulSoup
driver = webdriver.Chrome('F:/Download/SW/chromedriver_win32/chromedriver.exe')

url = 'https://www.google.com/maps/place/%E3%82%A2%E3%83%83%E3%83%90%E3%82%B7+%E3%82%B9%E3%82%A4%E3%83%BC%E3%83%84/@35.0903185,136.8551766,17z/data=!3m1!4b1!4m5!3m4!1s0x600378381c4bb1f7:0x8e9d356b9ded5bcc!8m2!3d35.0903185!4d136.8573653'
driver.get(url)

我曾尝试在“仍在营业”的餐厅中使用此代码获取评论数，但是当涉及到永久关闭的餐厅时，我无法获取评论数

span_review = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "section-star")]'))).click()


#Find the total number of reviews
total_number_of_reviews = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[2]/div[2]/div/div[2]/div[2]').text.split(" ")[0]
total_number_of_reviews = int(total_number_of_reviews.replace(',','')) if ',' in total_number_of_reviews else int(total_number_of_reviews)#Find scroll layout


total_reviews = driver.find_element_by_class_name("h0ySl-wcwwM-E70qVe-list")
total_reviews #= driver.get('aria-label')
total_reviews = total_reviews.get_text('aria-label')
total_reviews
total_reviews
total_number_of_reviews = total_reviews.text[0:]

total_number_of_reviews

希望能学到东西

谢谢！

Answer 1

我在 HTML 中找不到你的 xpath。没有 <button> 文本 section-star 但 <li class="section-star">.

并且 aria-label 不是文本而是 attribute 并且您必须使用 .get_attribute('aria-label')

但我找到了其他 xpath //button[jsaction="pane.rating.moreReviews"]，它适用于 permanent closed 和 still operating

已在 Firefox 和 Chrome、Linux.

上测试

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait

driver = webdriver.Chrome('F:/Download/SW/chromedriver_win32/chromedriver.exe')
#driver = webdriver.Chrome()
#driver = webdriver.Firefox()

all_urls = [
    # permanent closed
    'https://www.google.com/maps/place/%E3%82%A2%E3%83%83%E3%83%90%E3%82%B7+%E3%82%B9%E3%82%A4%E3%83%BC%E3%83%84/@35.0903185,136.8551766,17z/data=!3m1!4b1!4m5!3m4!1s0x600378381c4bb1f7:0x8e9d356b9ded5bcc!8m2!3d35.0903185!4d136.8573653',
    # still operating
    'https://www.google.com/maps/place/Seaside+Restaurant+Higashiyama+Garden+-+Port+Bldg./@35.0841323,136.8474088,14z/data=!3m1!5s0x6003790a61e056e7:0x7f307de064680a96!4m9!1m2!2m1!1srestaurants!3m5!1s0x600379a07cd9fcc7:0x89f84cc9f0422e30!8m2!3d35.0895485!4d136.8809243!15sCgtyZXN0YXVyYW50c1oNIgtyZXN0YXVyYW50c5IBCnJlc3RhdXJhbnQ',
]

for url in all_urls:
    driver.get(url)
    total_reviews = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[@jsaction="pane.rating.moreReviews"]')))
    total_reviews = total_reviews.get_attribute('aria-label')
    print(total_reviews)

获取 google 地图中没有 API 的永久关闭地点的评论总数

Get total number of review of permanently closed place in google map without API

python

web-scraping