Python scrapy 管道错误

Python scrapy pipeline error

我正在开发用于抓取某些页面的爬虫,但我遇到了 scrapy-pipeline 问题。我不知道为什么。脚本中有什么问题?

# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
from scrapy.exceptions import DropItem
import sqlite3

con = None

class RealtybasePipeline(object):

    def __init__(self):
        self.setupDBCon()
        self.createTables()

    def process_item(self, item, spider):
        self.storeInDb(item)
        return item

    def storeInDb(self, item):
        dealerId = self.cur.lastrowid
        self.storeSrealityInfoInDb(item)
        self.storeDealerInfoInDb(item)

    def storeSrealityInfoInDb(self, item):
        self.cur.execute("INSERT INTO sreality(\
            name, \
            price, \
            url, \
            estateType, \
            adress, \
            createDate, \
            source, \
            dealerName, \
            dealerMail, \
            dealerPhoto, \
            ) \
        VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ? )", \
        ( \
            item.get('name'), 
            item.get('price'), 
            item.get('url'), 
            item.get('estateType'), 
            item.get('adress'), 
            item.get('createDate'), 
            item.get('source'), 
            item.get('dealerName'), 
            item.get('dealerMail'), 
            item.get('dealerPhoto'), 
        ))
        self.con.commit()  

    def storeDealerInfoInDb(self, item):
        self.cur.execute("INSERT INTO Actors(\
            dealerName, \
            dealerMail, \
            dealerPhoto \
            ) \
        VALUES(?,?,?)", 
        (
            item.get('dealerName'), 
            item.get('dealerMail'), 
            item.get('dealerPhoto'), 
            ))
        self.con.commit()  

    def setupDBCon(self):
        self.con = sqlite3.connect('test.db')
        self.cur = self.con.cursor() 


    # this is the class destructor. It will get called automaticly by python's garbage collecter once this class is no longer used. 
    def __del__(self):
        self.closeDB()

    # I'm currently droping the tables if they exist before I run the script each time, so that
    # I don't get duplicate info. 
    def createTables(self):
        #self.dropSrealityTable()
        #self.dropDealersTable()
        self.createSrealityTable()
        self.createDealersTable()


    def createSrealityTable(self):
        self.cur.execute("CREATE TABLE IF NOT EXISTS sreality(name TEXT PRIMARY KEY NOT NULL, \
            price TEXT, \
            url TEXT, \
            photo TEXT, \
            estateType TEXT, \
            adress TEXT, \
            createDate Text, \
            source TEXT, \
            dealerName TEXT, \
            dealerMail TEXT, \
            dealerPhoto TEXT, \
            )")

    def createDealersTable(self):
        self.cur.execute("CREATE TABLE IF NOT EXISTS Actors(dealerName TEXT PRIMARY KEY NOT NULL, \
            dealerMail TEXT, \
            dealerPhoto TEXT )")

    def dropSrealityTable(self):
        self.cur.execute("DROP TABLE IF EXISTS sreality")

    def dropDealersTable(self):
        self.cur.execute("DROP TABLE IF EXISTS dealers")

    def closeDB(self):
        self.con.close()

在我 运行 蜘蛛之后,它抛出一些错误..

  File "/home/pr0n/Dropbox/realtyBase/realtyBase/pipelines.py", line 16, in __init__
    self.createTables()

  File "/home/pr0n/Dropbox/realtyBase/realtyBase/pipelines.py", line 83, in createTables
    self.createSrealityTable()

  File "/home/pr0n/Dropbox/realtyBase/realtyBase/pipelines.py", line 99, in createSrealityTable
    )")
sqlite3.OperationalError: near ")": syntax error

右括号前多了一个逗号:

self.cur.execute("CREATE TABLE IF NOT EXISTS sreality(name TEXT PRIMARY KEY NOT NULL, \
        price TEXT, \
        url TEXT, \
        photo TEXT, \
        estateType TEXT, \
        adress TEXT, \
        createDate Text, \
        source TEXT, \
        dealerName TEXT, \
        dealerMail TEXT, \
        dealerPhoto TEXT, \  # < HERE
        )")

附带一点,为了提高可读性,您可以用三引号将多行查询写成:

self.cur.execute("""
    CREATE TABLE IF NOT EXISTS 
        sreality 
        (name TEXT PRIMARY KEY NOT NULL, 
         price TEXT, 
         url TEXT, 
         photo TEXT, 
         estateType TEXT, 
         adress TEXT, 
         createDate Text, 
         source TEXT, 
         dealerName TEXT, 
         dealerMail TEXT, 
         dealerPhoto TEXT)
""")

你也可以考虑用 ORM 抽象出来,比如 sqlalchemy