获取 google 地图中没有 API 的永久关闭地点的评论总数
Get total number of review of permanently closed place in google map without API
我正在学习 python 以及网络抓取,我想从永久关闭的餐厅的 google 地图中获取评论数量,但我做不到,请你帮忙好吗?谢谢
from bs4 import BeautifulSoup
url = 'https://www.google.com/maps?q=asia+halal+restaurant+aichi+japan+open+date&safe=strict&rlz=1C1GCEA_enID892ID892&sxsrf=ALeKk01NqaBLM8bXeVVS6M6tv9kAy0G6qQ:1616997971678&gs_lcp=Cgdnd3Mtd2l6EAM6BwgjELADECc6BQghEKABOgQIIRAVOgcIIRAKEKABUIUIWKojYOckaABwAHgAgAHHAogB7RGSAQcxLjUuNC4ymAEAoAEBqgEHZ3dzLXdpesgBAcABAQ&uact=5&um=1&ie=UTF-8&sa=X&ved=2ahUKEwjbhef-7NTvAhWa93MBHaFHCzYQ_AUoAXoECAEQAw'
import requests
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
ps = soup.find_all(string = 'クチコミ')
ps
我也试过使用下面 chrome 开发者工具的 find 'class' 和 'span aria-label' 但还是不行
browser picture for html class
#ps = soup.find_all(class_='h0ySl-wcwwM-E70qVe-list')
#ps = soup.find_all('span aria-label')
#total_rev = ps.get_text()
#total_rev
这是我尝试使用 selenium 的代码
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from bs4 import BeautifulSoup
driver = webdriver.Chrome('F:/Download/SW/chromedriver_win32/chromedriver.exe')
url = 'https://www.google.com/maps/place/%E3%82%A2%E3%83%83%E3%83%90%E3%82%B7+%E3%82%B9%E3%82%A4%E3%83%BC%E3%83%84/@35.0903185,136.8551766,17z/data=!3m1!4b1!4m5!3m4!1s0x600378381c4bb1f7:0x8e9d356b9ded5bcc!8m2!3d35.0903185!4d136.8573653'
driver.get(url)
我曾尝试在“仍在营业”的餐厅中使用此代码获取评论数,但是当涉及到永久关闭的餐厅时,我无法获取评论数
span_review = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "section-star")]'))).click()
#Find the total number of reviews
total_number_of_reviews = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[2]/div[2]/div/div[2]/div[2]').text.split(" ")[0]
total_number_of_reviews = int(total_number_of_reviews.replace(',','')) if ',' in total_number_of_reviews else int(total_number_of_reviews)#Find scroll layout
total_reviews = driver.find_element_by_class_name("h0ySl-wcwwM-E70qVe-list")
total_reviews #= driver.get('aria-label')
total_reviews = total_reviews.get_text('aria-label')
total_reviews
total_reviews
total_number_of_reviews = total_reviews.text[0:]
total_number_of_reviews
希望能学到东西
谢谢!
我在 HTML 中找不到你的 xpath
。没有 <button>
文本 section-star
但 <li class="section-star">
.
并且 aria-label
不是文本而是 attribute
并且您必须使用 .get_attribute('aria-label')
但我找到了其他 xpath //button[jsaction="pane.rating.moreReviews"]
,它适用于 permanent closed
和 still operating
已在 Firefox 和 Chrome、Linux.
上测试
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
driver = webdriver.Chrome('F:/Download/SW/chromedriver_win32/chromedriver.exe')
#driver = webdriver.Chrome()
#driver = webdriver.Firefox()
all_urls = [
# permanent closed
'https://www.google.com/maps/place/%E3%82%A2%E3%83%83%E3%83%90%E3%82%B7+%E3%82%B9%E3%82%A4%E3%83%BC%E3%83%84/@35.0903185,136.8551766,17z/data=!3m1!4b1!4m5!3m4!1s0x600378381c4bb1f7:0x8e9d356b9ded5bcc!8m2!3d35.0903185!4d136.8573653',
# still operating
'https://www.google.com/maps/place/Seaside+Restaurant+Higashiyama+Garden+-+Port+Bldg./@35.0841323,136.8474088,14z/data=!3m1!5s0x6003790a61e056e7:0x7f307de064680a96!4m9!1m2!2m1!1srestaurants!3m5!1s0x600379a07cd9fcc7:0x89f84cc9f0422e30!8m2!3d35.0895485!4d136.8809243!15sCgtyZXN0YXVyYW50c1oNIgtyZXN0YXVyYW50c5IBCnJlc3RhdXJhbnQ',
]
for url in all_urls:
driver.get(url)
total_reviews = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[@jsaction="pane.rating.moreReviews"]')))
total_reviews = total_reviews.get_attribute('aria-label')
print(total_reviews)
我正在学习 python 以及网络抓取,我想从永久关闭的餐厅的 google 地图中获取评论数量,但我做不到,请你帮忙好吗?谢谢
from bs4 import BeautifulSoup
url = 'https://www.google.com/maps?q=asia+halal+restaurant+aichi+japan+open+date&safe=strict&rlz=1C1GCEA_enID892ID892&sxsrf=ALeKk01NqaBLM8bXeVVS6M6tv9kAy0G6qQ:1616997971678&gs_lcp=Cgdnd3Mtd2l6EAM6BwgjELADECc6BQghEKABOgQIIRAVOgcIIRAKEKABUIUIWKojYOckaABwAHgAgAHHAogB7RGSAQcxLjUuNC4ymAEAoAEBqgEHZ3dzLXdpesgBAcABAQ&uact=5&um=1&ie=UTF-8&sa=X&ved=2ahUKEwjbhef-7NTvAhWa93MBHaFHCzYQ_AUoAXoECAEQAw'
import requests
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
ps = soup.find_all(string = 'クチコミ')
ps
我也试过使用下面 chrome 开发者工具的 find 'class' 和 'span aria-label' 但还是不行
browser picture for html class
#ps = soup.find_all(class_='h0ySl-wcwwM-E70qVe-list')
#ps = soup.find_all('span aria-label')
#total_rev = ps.get_text()
#total_rev
这是我尝试使用 selenium 的代码
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from bs4 import BeautifulSoup
driver = webdriver.Chrome('F:/Download/SW/chromedriver_win32/chromedriver.exe')
url = 'https://www.google.com/maps/place/%E3%82%A2%E3%83%83%E3%83%90%E3%82%B7+%E3%82%B9%E3%82%A4%E3%83%BC%E3%83%84/@35.0903185,136.8551766,17z/data=!3m1!4b1!4m5!3m4!1s0x600378381c4bb1f7:0x8e9d356b9ded5bcc!8m2!3d35.0903185!4d136.8573653'
driver.get(url)
我曾尝试在“仍在营业”的餐厅中使用此代码获取评论数,但是当涉及到永久关闭的餐厅时,我无法获取评论数
span_review = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "section-star")]'))).click()
#Find the total number of reviews
total_number_of_reviews = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[2]/div[2]/div/div[2]/div[2]').text.split(" ")[0]
total_number_of_reviews = int(total_number_of_reviews.replace(',','')) if ',' in total_number_of_reviews else int(total_number_of_reviews)#Find scroll layout
total_reviews = driver.find_element_by_class_name("h0ySl-wcwwM-E70qVe-list")
total_reviews #= driver.get('aria-label')
total_reviews = total_reviews.get_text('aria-label')
total_reviews
total_reviews
total_number_of_reviews = total_reviews.text[0:]
total_number_of_reviews
希望能学到东西
谢谢!
我在 HTML 中找不到你的 xpath
。没有 <button>
文本 section-star
但 <li class="section-star">
.
并且 aria-label
不是文本而是 attribute
并且您必须使用 .get_attribute('aria-label')
但我找到了其他 xpath //button[jsaction="pane.rating.moreReviews"]
,它适用于 permanent closed
和 still operating
已在 Firefox 和 Chrome、Linux.
上测试from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
driver = webdriver.Chrome('F:/Download/SW/chromedriver_win32/chromedriver.exe')
#driver = webdriver.Chrome()
#driver = webdriver.Firefox()
all_urls = [
# permanent closed
'https://www.google.com/maps/place/%E3%82%A2%E3%83%83%E3%83%90%E3%82%B7+%E3%82%B9%E3%82%A4%E3%83%BC%E3%83%84/@35.0903185,136.8551766,17z/data=!3m1!4b1!4m5!3m4!1s0x600378381c4bb1f7:0x8e9d356b9ded5bcc!8m2!3d35.0903185!4d136.8573653',
# still operating
'https://www.google.com/maps/place/Seaside+Restaurant+Higashiyama+Garden+-+Port+Bldg./@35.0841323,136.8474088,14z/data=!3m1!5s0x6003790a61e056e7:0x7f307de064680a96!4m9!1m2!2m1!1srestaurants!3m5!1s0x600379a07cd9fcc7:0x89f84cc9f0422e30!8m2!3d35.0895485!4d136.8809243!15sCgtyZXN0YXVyYW50c1oNIgtyZXN0YXVyYW50c5IBCnJlc3RhdXJhbnQ',
]
for url in all_urls:
driver.get(url)
total_reviews = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[@jsaction="pane.rating.moreReviews"]')))
total_reviews = total_reviews.get_attribute('aria-label')
print(total_reviews)