触发Selenium点击功能后无法获取标签文本
Can't get tag text after trigger Selenium click function
http://211.21.63.217:81/ticket_online.aspx
爬取网站电影列表数据,成功获取到电影名称、电影时间
然后我想在明天的电影时间抓取每部电影,所以我使用 selenium 点击 ever tomorrow <span />
。
我确保 Selenium 已点击每个 <Span />
我也已设置 time.sleep
并检查标签根是否正确,但明天 movieTime
结果为空。
我不知道为什么。
这是我的代码:
# -*- coding: utf-8 -*-
import scrapy
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from scrapy.http import HtmlResponse
from datetime import datetime
from datetime import timedelta
import time
# like 07/02
tomorrowDate = datetime.now() + timedelta(days=1)
tomorrow = tomorrowDate.strftime('%m/%d')
afterTomorrowDate = datetime.now() + timedelta(days=2)
afterTomorrow = afterTomorrowDate.strftime('%m/%d')
print(tomorrow)
print(afterTomorrow)
class CenturyasiaxinbeiSpider(scrapy.Spider):
name = 'CenturyasiaXinbei'
allowed_domains = ['www.centuryasia.com.tw', '211.21.63.217:81']
start_urls = ['http://211.21.63.217:81/ticket_online.aspx']
def __init__(self):
print('__init__')
self.driver = webdriver.Chrome('/Users/motogod19/chromedriver')
self.driver.get('http://211.21.63.217:81/ticket_online.aspx')
def parse(self, response):
firstHtmlNodes = response.xpath('//article[@id="ContentPlaceHolder1_Time_box"]//section[@class="tickets_movie_time_box"]')
for firstHtmlNode in firstHtmlNodes:
# print(firstHtmlNode)
# Today
movieCnName = firstHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_times_t"]/div[@class="times_title"]/text()').get()
movieEnName = firstHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_times_t"]/div[@class="times_title_en"]/text()').get()
movieTime = firstHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_date"]/div[3]/div/div[@class="tickets_date_csbox"]/div/ul/li/text()').extract()
print(movieCnName)
print(movieEnName)
# sort the movie time
print(sorted(movieTime))
print('\n')
time.sleep(2)
allSpanDate = self.driver.find_elements_by_xpath("//span[text()='{}']".format(tomorrow))
for spanDate in allSpanDate:
ActionChains(self.driver).move_to_element(spanDate).perform()
spanDate.click()
time.sleep(1)
tomorrowResponse = HtmlResponse(url=self.driver.current_url, body=self.driver.page_source, encoding='utf-8')
secondHtmlNodes = tomorrowResponse.xpath('//article[@id="ContentPlaceHolder1_Time_box"]//section[@class="tickets_movie_time_box"]')
for secondHtmlNode in secondHtmlNodes:
# Tomorrow
movieCnName = secondHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_times_t"]/div[@class="times_title"]/text()').get()
movieEnName = secondHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_times_t"]/div[@class="times_title_en"]/text()').get()
# movieTime = secondHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_date"]/div[3]/div/div[@class="tickets_date_csbox"]/div/ul/li/text()').extract()
movieTime = secondHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_date"]/div[3]/div[@class="tickets_date_cts"]/ul')
# tickets_date_cts
print(movieCnName)
print(movieEnName)
# sort movie time
print(sorted(movieTime)) # Empty is here
print('\n')
time.sleep(60)
试试这个 xpath,它适用于 movieTime = firstHtmlNode.xpath()
和 movieTime = secondHtmlNode.xpath
。
.//div[@class='tickets_date_cts']/ul/li
http://211.21.63.217:81/ticket_online.aspx
爬取网站电影列表数据,成功获取到电影名称、电影时间
然后我想在明天的电影时间抓取每部电影,所以我使用 selenium 点击 ever tomorrow <span />
。
我确保 Selenium 已点击每个 <Span />
我也已设置 time.sleep
并检查标签根是否正确,但明天 movieTime
结果为空。
我不知道为什么。
这是我的代码:
# -*- coding: utf-8 -*-
import scrapy
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from scrapy.http import HtmlResponse
from datetime import datetime
from datetime import timedelta
import time
# like 07/02
tomorrowDate = datetime.now() + timedelta(days=1)
tomorrow = tomorrowDate.strftime('%m/%d')
afterTomorrowDate = datetime.now() + timedelta(days=2)
afterTomorrow = afterTomorrowDate.strftime('%m/%d')
print(tomorrow)
print(afterTomorrow)
class CenturyasiaxinbeiSpider(scrapy.Spider):
name = 'CenturyasiaXinbei'
allowed_domains = ['www.centuryasia.com.tw', '211.21.63.217:81']
start_urls = ['http://211.21.63.217:81/ticket_online.aspx']
def __init__(self):
print('__init__')
self.driver = webdriver.Chrome('/Users/motogod19/chromedriver')
self.driver.get('http://211.21.63.217:81/ticket_online.aspx')
def parse(self, response):
firstHtmlNodes = response.xpath('//article[@id="ContentPlaceHolder1_Time_box"]//section[@class="tickets_movie_time_box"]')
for firstHtmlNode in firstHtmlNodes:
# print(firstHtmlNode)
# Today
movieCnName = firstHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_times_t"]/div[@class="times_title"]/text()').get()
movieEnName = firstHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_times_t"]/div[@class="times_title_en"]/text()').get()
movieTime = firstHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_date"]/div[3]/div/div[@class="tickets_date_csbox"]/div/ul/li/text()').extract()
print(movieCnName)
print(movieEnName)
# sort the movie time
print(sorted(movieTime))
print('\n')
time.sleep(2)
allSpanDate = self.driver.find_elements_by_xpath("//span[text()='{}']".format(tomorrow))
for spanDate in allSpanDate:
ActionChains(self.driver).move_to_element(spanDate).perform()
spanDate.click()
time.sleep(1)
tomorrowResponse = HtmlResponse(url=self.driver.current_url, body=self.driver.page_source, encoding='utf-8')
secondHtmlNodes = tomorrowResponse.xpath('//article[@id="ContentPlaceHolder1_Time_box"]//section[@class="tickets_movie_time_box"]')
for secondHtmlNode in secondHtmlNodes:
# Tomorrow
movieCnName = secondHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_times_t"]/div[@class="times_title"]/text()').get()
movieEnName = secondHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_times_t"]/div[@class="times_title_en"]/text()').get()
# movieTime = secondHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_date"]/div[3]/div/div[@class="tickets_date_csbox"]/div/ul/li/text()').extract()
movieTime = secondHtmlNode.xpath('./div[@class="tickets_movie_time"]/div[@class="tickets_times"]/div[@class="tickets_date"]/div[3]/div[@class="tickets_date_cts"]/ul')
# tickets_date_cts
print(movieCnName)
print(movieEnName)
# sort movie time
print(sorted(movieTime)) # Empty is here
print('\n')
time.sleep(60)
试试这个 xpath,它适用于 movieTime = firstHtmlNode.xpath()
和 movieTime = secondHtmlNode.xpath
。
.//div[@class='tickets_date_cts']/ul/li