TypeError: 'instancemethod' object has no attribute' __getitem__' when run scrapy spider
TypeError: 'instancemethod' object has no attribute' __getitem__' when run scrapy spider
我在 运行 scrapy 项目时遇到了这个错误
我的 spider.py 密码是
import scrapy
import re
from tutorial.items import TutorialItem
class tutorialSpider(scrapy.Spider):
name="tutorial"
allowed_domain=['examble.com']
start_urls = ["examble.com/something"]
def parse(self, response):
for sel in response.xpath('//*[@id="post-entry"]/div/article'):
item = TutorialItem()
item['Title'] = sel.xpath('div[2]/h2/a/text()').extract[0]
item['MainPageUrl'] = sel.xpath('div[2]/h2/a/@href').extract[0]
item['Author'] = sel.xpath('div[2]/div/span/a/text()').extract[0]
request = scrapy.Request(item['MianPageUrl'], callback=self.parseContentDetails)
request.meta['item'] = item
yield request
def parseContentDetails(self,response):
item = response.meta['item']
item['Content'] = response.xpath()
item['Count'] = response.xpath()
print type(item)
return item
我的pipeline.py是
class TutorialPipeline(object):
def __init__(self):
#self.setupDBCon()
#self.createTables()
def process_item(self, item, spider):
for key, value in item.iteritems():
if(isinstance(value, list)):
if value:
templist = []
for obj in value:
temp = self.stripHTML(obj)
templist.append(temp)
item[key] = templist
else:
item[key] = ""
else:
item[key] = self.stripHTML(value)
print item.get('Title', '')
return item
我的items.py是
from scrapy.item import Item, Field
class TutorialItem(Item):
Title=Field()
Author = Field()
MianPageUrl = Field()
Content=Field()
Count=Field()
请告诉我这个错误的解决方法。我搜索了很多网站。该站点仅告诉 instancemethod 对象在 django 中没有属性错误,但我想要 scrapy
的解决方案
您没有正确调用 extract,对于每个 extract,您必须实际调用该方法然后对其进行索引:
item['Title'] = sel.xpath('div[2]/h2/a/text()').extract()[0]
^^^
如果你只想要第一个元素,你可以使用 extract_first.
item['Title'] = sel.xpath('div[2]/h2/a/text()').extract_first()
我在 运行 scrapy 项目时遇到了这个错误 我的 spider.py 密码是
import scrapy
import re
from tutorial.items import TutorialItem
class tutorialSpider(scrapy.Spider):
name="tutorial"
allowed_domain=['examble.com']
start_urls = ["examble.com/something"]
def parse(self, response):
for sel in response.xpath('//*[@id="post-entry"]/div/article'):
item = TutorialItem()
item['Title'] = sel.xpath('div[2]/h2/a/text()').extract[0]
item['MainPageUrl'] = sel.xpath('div[2]/h2/a/@href').extract[0]
item['Author'] = sel.xpath('div[2]/div/span/a/text()').extract[0]
request = scrapy.Request(item['MianPageUrl'], callback=self.parseContentDetails)
request.meta['item'] = item
yield request
def parseContentDetails(self,response):
item = response.meta['item']
item['Content'] = response.xpath()
item['Count'] = response.xpath()
print type(item)
return item
我的pipeline.py是
class TutorialPipeline(object):
def __init__(self):
#self.setupDBCon()
#self.createTables()
def process_item(self, item, spider):
for key, value in item.iteritems():
if(isinstance(value, list)):
if value:
templist = []
for obj in value:
temp = self.stripHTML(obj)
templist.append(temp)
item[key] = templist
else:
item[key] = ""
else:
item[key] = self.stripHTML(value)
print item.get('Title', '')
return item
我的items.py是
from scrapy.item import Item, Field
class TutorialItem(Item):
Title=Field()
Author = Field()
MianPageUrl = Field()
Content=Field()
Count=Field()
请告诉我这个错误的解决方法。我搜索了很多网站。该站点仅告诉 instancemethod 对象在 django 中没有属性错误,但我想要 scrapy
的解决方案您没有正确调用 extract,对于每个 extract,您必须实际调用该方法然后对其进行索引:
item['Title'] = sel.xpath('div[2]/h2/a/text()').extract()[0]
^^^
如果你只想要第一个元素,你可以使用 extract_first.
item['Title'] = sel.xpath('div[2]/h2/a/text()').extract_first()