LinkExtractor for xpath-ed response TypeError: unhashable type: 'SelectorList'
LinkExtractor for xpath-ed response TypeError: unhashable type: 'SelectorList'
我正在使用 scrapy 2.5
和 python 3.8
目前我的源码是这样的
import scrapy
from scrapy.linkextractors import LinkExtractor
class YahooSpider(scrapy.Spider):
name = 'yahoo'
allowed_domains = ['news.yahoo.com']
def start_requests(self):
urls = ['https://news.yahoo.com/search?p=%E6%97%A5%E6%9C%AC%E3%83%8F%E3%83%A0&ei=utf-8']
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
xlink = LinkExtractor()
for link in xlink.extract_links(response):
print("GETLINK:{0}".format(link)) #OK I can get the link of page.
contents = response.xpath("//div[contains(@class,'contentsWrap')]") # xpath-ed div class contents
for link in xlink.extract_links(contents):
print("GETLINK:{0}".format(link)) # error TypeError: unhashable type: 'SelectorList'
pass
显示TypeError: unhashable type: 'SelectorList'
我理解这个错误的意思,
但是在 xpath-ed 之后有没有办法使用 extract_links?
您正在尝试从选择器中提取链接。
使用 restrict_xpaths 并从响应中提取链接。
(另请注意,您使用了错误的 xpath,没有 'contentsWrap' class...)
import scrapy
from scrapy.linkextractors import LinkExtractor
class YahooSpider(scrapy.Spider):
name = 'yahoo'
allowed_domains = ['news.yahoo.com']
def start_requests(self):
urls = ['https://news.yahoo.com/search?p=%E6%97%A5%E6%9C%AC%E3%83%8F%E3%83%A0&ei=utf-8']
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
xlink = LinkExtractor()
for link in xlink.extract_links(response):
print("GETLINK:{0}".format(link))
# maybe: ???
#xlink.restrict_xpaths = ['//div[contains(@class, "contents")]']
xlink.restrict_xpaths = ["//div[contains(@class,'contentsWrap')]"] # xpath-ed div class contents
for link in xlink.extract_links(response): # this is empty
print("GETLINK:{0}".format(link))
pass
我正在使用 scrapy 2.5
和 python 3.8
目前我的源码是这样的
import scrapy
from scrapy.linkextractors import LinkExtractor
class YahooSpider(scrapy.Spider):
name = 'yahoo'
allowed_domains = ['news.yahoo.com']
def start_requests(self):
urls = ['https://news.yahoo.com/search?p=%E6%97%A5%E6%9C%AC%E3%83%8F%E3%83%A0&ei=utf-8']
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
xlink = LinkExtractor()
for link in xlink.extract_links(response):
print("GETLINK:{0}".format(link)) #OK I can get the link of page.
contents = response.xpath("//div[contains(@class,'contentsWrap')]") # xpath-ed div class contents
for link in xlink.extract_links(contents):
print("GETLINK:{0}".format(link)) # error TypeError: unhashable type: 'SelectorList'
pass
显示TypeError: unhashable type: 'SelectorList'
我理解这个错误的意思,
但是在 xpath-ed 之后有没有办法使用 extract_links?
您正在尝试从选择器中提取链接。
使用 restrict_xpaths 并从响应中提取链接。
(另请注意,您使用了错误的 xpath,没有 'contentsWrap' class...)
import scrapy
from scrapy.linkextractors import LinkExtractor
class YahooSpider(scrapy.Spider):
name = 'yahoo'
allowed_domains = ['news.yahoo.com']
def start_requests(self):
urls = ['https://news.yahoo.com/search?p=%E6%97%A5%E6%9C%AC%E3%83%8F%E3%83%A0&ei=utf-8']
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
xlink = LinkExtractor()
for link in xlink.extract_links(response):
print("GETLINK:{0}".format(link))
# maybe: ???
#xlink.restrict_xpaths = ['//div[contains(@class, "contents")]']
xlink.restrict_xpaths = ["//div[contains(@class,'contentsWrap')]"] # xpath-ed div class contents
for link in xlink.extract_links(response): # this is empty
print("GETLINK:{0}".format(link))
pass