从 scrapy 管道中的 self.files.pop(spider) 获取错误 {KeyError}<Classname spidername at 0x7fb3f6b9c790>
Getting error {KeyError}<Classname spidername at 0x7fb3f6b9c790> from self.files.pop(spider) in scrapy pipeline
当我 运行 scrapy 从一个蜘蛛导出多个 csv 时,我收到来自 self.files.pop(spider) 的错误 {KeyError}。
这是我的管道。
class PhysiciansPipeline(object):
def __init__(self, spider):
self.files = {}
full_path = result_path(spider.result_path_type, spider.name)
self.exporter1 = CsvItemExporter(fields_to_export=PhysiciansItem.fields.keys(),file=open(full_path + 'file1.csv','wb'))
self.exporter2 = CsvItemExporter(fields_to_export=SpecialtiesItem.fields.keys(),file=open(full_path + 'file2.csv','wb'))
self.exporter3 = CsvItemExporter(fields_to_export=LocationsItem.fields.keys(), file=open(full_path + 'file3.csv', 'wb'))
@classmethod
def from_crawler(cls, crawler):
spider = crawler.spider
pipeline = cls(spider)
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.exporter1.start_exporting()
self.exporter2.start_exporting()
self.exporter3.start_exporting()
def spider_closed(self, spider):
self.exporter1.finish_exporting()
self.exporter2.finish_exporting()
self.exporter3.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter1.export_item(item)
self.exporter2.export_item(item)
self.exporter3.export_item(item)
return item
我还在 settings.py
中添加了这一行
ITEM_PIPELINES = {
'physicians.pipelines.PhysiciansPipeline': 300,
}
这段代码有什么问题?
谢谢
我没有看到任何值被添加到 self.files
说到错误,意思是spider
键不存在于self.files
中
我猜你正在寻找
self.files.pop(spider.name)
编辑:
class PhysiciansPipeline(object):
def __init__(self, spider):
self.files=[]
self.full_path = result_path(spider.result_path_type, spider.name)
file1 = open(full_path + 'physicians.csv','wb')
self.files.extend([ file1 ])
self.exporter1 = CsvItemExporter(fields_to_export=PhysiciansItem.fields.keys(),file=file1)
file2= open(full_path + 'specialities.csv','wb')
self.files.extend([ files2 ])
self.exporter2 = CsvItemExporter(fields_to_export=SpecialtiesItem.fields.keys(),file=file2)
file3 = open(full_path + 'locations.csv', 'wb')
self.files.extend([ file3 ])
self.exporter3 = CsvItemExporter(fields_to_export=LocationsItem.fields.keys(), file=file3)
@classmethod
def from_crawler(cls, crawler):
spider = crawler.spider
pipeline = cls(spider)
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.exporter1.start_exporting()
self.exporter2.start_exporting()
self.exporter3.start_exporting()
def spider_closed(self, spider):
self.exporter1.finish_exporting()
self.exporter2.finish_exporting()
self.exporter3.finish_exporting()
for _file in self.files:
_file.close()
clean_csv(full_path)
def process_item(self, item, spider):
self.exporter1.export_item(item)
self.exporter2.export_item(item)
self.exporter3.export_item(item)
return item
当我 运行 scrapy 从一个蜘蛛导出多个 csv 时,我收到来自 self.files.pop(spider) 的错误 {KeyError}。
这是我的管道。
class PhysiciansPipeline(object):
def __init__(self, spider):
self.files = {}
full_path = result_path(spider.result_path_type, spider.name)
self.exporter1 = CsvItemExporter(fields_to_export=PhysiciansItem.fields.keys(),file=open(full_path + 'file1.csv','wb'))
self.exporter2 = CsvItemExporter(fields_to_export=SpecialtiesItem.fields.keys(),file=open(full_path + 'file2.csv','wb'))
self.exporter3 = CsvItemExporter(fields_to_export=LocationsItem.fields.keys(), file=open(full_path + 'file3.csv', 'wb'))
@classmethod
def from_crawler(cls, crawler):
spider = crawler.spider
pipeline = cls(spider)
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.exporter1.start_exporting()
self.exporter2.start_exporting()
self.exporter3.start_exporting()
def spider_closed(self, spider):
self.exporter1.finish_exporting()
self.exporter2.finish_exporting()
self.exporter3.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter1.export_item(item)
self.exporter2.export_item(item)
self.exporter3.export_item(item)
return item
我还在 settings.py
中添加了这一行ITEM_PIPELINES = {
'physicians.pipelines.PhysiciansPipeline': 300,
}
这段代码有什么问题? 谢谢
我没有看到任何值被添加到 self.files
说到错误,意思是spider
键不存在于self.files
我猜你正在寻找
self.files.pop(spider.name)
编辑:
class PhysiciansPipeline(object):
def __init__(self, spider):
self.files=[]
self.full_path = result_path(spider.result_path_type, spider.name)
file1 = open(full_path + 'physicians.csv','wb')
self.files.extend([ file1 ])
self.exporter1 = CsvItemExporter(fields_to_export=PhysiciansItem.fields.keys(),file=file1)
file2= open(full_path + 'specialities.csv','wb')
self.files.extend([ files2 ])
self.exporter2 = CsvItemExporter(fields_to_export=SpecialtiesItem.fields.keys(),file=file2)
file3 = open(full_path + 'locations.csv', 'wb')
self.files.extend([ file3 ])
self.exporter3 = CsvItemExporter(fields_to_export=LocationsItem.fields.keys(), file=file3)
@classmethod
def from_crawler(cls, crawler):
spider = crawler.spider
pipeline = cls(spider)
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
self.exporter1.start_exporting()
self.exporter2.start_exporting()
self.exporter3.start_exporting()
def spider_closed(self, spider):
self.exporter1.finish_exporting()
self.exporter2.finish_exporting()
self.exporter3.finish_exporting()
for _file in self.files:
_file.close()
clean_csv(full_path)
def process_item(self, item, spider):
self.exporter1.export_item(item)
self.exporter2.export_item(item)
self.exporter3.export_item(item)
return item