使用 Pymssql 将数据插入 MS SQL 数据库时出错
Error inserting data into MS SQL DB Using Pymssql
已修复
对于一个教育项目,我试图将抓取的数据存储在 MS SQL 数据库中。首先,我希望每个独特的项目都放在 products_tb
中。插入唯一产品后,SQL 必须为所述项目生成一个唯一 ID,即 productgroupid
。 products_tb
table 只会产生永远不会改变的产品信息,例如productid, category, name and description
。在第二个 table 中,我将在我完成此工作后创建它,我将存储以下数据:productgroupid, price, timestamp
。这样做的原因是这些可能会不时更改。使用 productgroupid
我可以随时对所有数据进行分组并创建图表等。
问题是我无法让我的 pipelines.py
工作。但是我确实设法使用注释代码块将数据插入我的 SQL 数据库:
# self.cursor.execute("INSERT INTO products_tb(productid, category, name, description, price, timestamp) VALUES (%s, %s, %s, %s, %s, %s)",
# (item['productid'], item['category'], item['name'], item['description'], item['price'], item['timestamp']))
似乎可以使用以下代码
pipelines.py
import pymssql
class KrcPipeline(object):
def __init__(self):
self.conn = pymssql.connect(host='DESKTOP-P1TF28R', user='sa', password='123', database='kaercher')
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
# self.cursor.execute("INSERT INTO products_tb(productid, category, name, description, price, timestamp) VALUES (%s, %s, %s, %s, %s, %s)",
# (item['productid'], item['category'], item['name'], item['description'], item['price'], item['timestamp']))
sql_statement = f'''
BEGIN
IF NOT EXISTS (SELECT * FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']})
BEGIN
INSERT INTO [kaercher].[dbo].[products_tb] (productid, category, name, description)
OUTPUT (Inserted.productgroupid)
VALUES ({item['productid']}, '{item['category']}', '{item['name']}', '{item['description']}')
END
ELSE
BEGIN
SELECT productgroupid FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']}
END
END
'''
self.cursor.execute(sql_statement)
self.conn.commit()
return item
items.py
import scrapy
class KrcItem(scrapy.Item):
productid=scrapy.Field()
name=scrapy.Field()
description=scrapy.Field()
price=scrapy.Field()
producttype=scrapy.Field()
timestamp=scrapy.Field()
category=scrapy.Field()
pass
编辑:
我错过了另一个小错误。 "IF NOT EXIST" 需要更改为 "IF NOT EXISTS"。
import pymssql
class KrcPipeline(object):
def __init__(self):
self.conn = pymssql.connect(host='DESKTOP-P1TF28R', user='sa', password='123', database='kaercher')
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
# self.cursor.execute("INSERT INTO products_tb(productid, category, name, description, price, timestamp) VALUES (%s, %s, %s, %s, %s, %s)",
# (item['productid'], item['category'], item['name'], item['description'], item['price'], item['timestamp']))
sql_statement = f'''
BEGIN
IF NOT EXISTS (SELECT * FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']})
BEGIN
INSERT INTO [kaercher].[dbo].[products_tb] (productid, category, name, description)
OUTPUT (Inserted.productgroupid)
VALUES ({item['productid']}, {item['category']}, {item['name']}, {item['description']})
END
ELSE
BEGIN
SELECT productgroupid FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']}
END
END
'''
self.cursor.execute(sql_statement)
self.conn.commit()
return item
原文:
您在定义 sql_statement 时没有正确调用项目字典中的值。试试这个:
import pymssql
class KrcPipeline(object):
def __init__(self):
self.conn = pymssql.connect(host='DESKTOP-P1TF28R', user='sa', password='123', database='kaercher')
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
# self.cursor.execute("INSERT INTO products_tb(productid, category, name, description, price, timestamp) VALUES (%s, %s, %s, %s, %s, %s)",
# (item['productid'], item['category'], item['name'], item['description'], item['price'], item['timestamp']))
sql_statement = f'''
BEGIN
IF NOT EXIST (SELECT * FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']})
BEGIN
INSERT INTO [kaercher].[dbo].[products_tb] (productid, category, name, description)
OUTPUT (Inserted.productgroupid)
VALUES ({item['productid']}, {item['category']}, {item['name']}, {item['description']})
END
ELSE
BEGIN
SELECT productgroupid FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']}
END
END
'''
self.cursor.execute(sql_statement)
self.conn.commit()
return item
已修复
对于一个教育项目,我试图将抓取的数据存储在 MS SQL 数据库中。首先,我希望每个独特的项目都放在 products_tb
中。插入唯一产品后,SQL 必须为所述项目生成一个唯一 ID,即 productgroupid
。 products_tb
table 只会产生永远不会改变的产品信息,例如productid, category, name and description
。在第二个 table 中,我将在我完成此工作后创建它,我将存储以下数据:productgroupid, price, timestamp
。这样做的原因是这些可能会不时更改。使用 productgroupid
我可以随时对所有数据进行分组并创建图表等。
问题是我无法让我的 pipelines.py
工作。但是我确实设法使用注释代码块将数据插入我的 SQL 数据库:
# self.cursor.execute("INSERT INTO products_tb(productid, category, name, description, price, timestamp) VALUES (%s, %s, %s, %s, %s, %s)",
# (item['productid'], item['category'], item['name'], item['description'], item['price'], item['timestamp']))
似乎可以使用以下代码
pipelines.py
import pymssql
class KrcPipeline(object):
def __init__(self):
self.conn = pymssql.connect(host='DESKTOP-P1TF28R', user='sa', password='123', database='kaercher')
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
# self.cursor.execute("INSERT INTO products_tb(productid, category, name, description, price, timestamp) VALUES (%s, %s, %s, %s, %s, %s)",
# (item['productid'], item['category'], item['name'], item['description'], item['price'], item['timestamp']))
sql_statement = f'''
BEGIN
IF NOT EXISTS (SELECT * FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']})
BEGIN
INSERT INTO [kaercher].[dbo].[products_tb] (productid, category, name, description)
OUTPUT (Inserted.productgroupid)
VALUES ({item['productid']}, '{item['category']}', '{item['name']}', '{item['description']}')
END
ELSE
BEGIN
SELECT productgroupid FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']}
END
END
'''
self.cursor.execute(sql_statement)
self.conn.commit()
return item
items.py
import scrapy
class KrcItem(scrapy.Item):
productid=scrapy.Field()
name=scrapy.Field()
description=scrapy.Field()
price=scrapy.Field()
producttype=scrapy.Field()
timestamp=scrapy.Field()
category=scrapy.Field()
pass
编辑:
我错过了另一个小错误。 "IF NOT EXIST" 需要更改为 "IF NOT EXISTS"。
import pymssql
class KrcPipeline(object):
def __init__(self):
self.conn = pymssql.connect(host='DESKTOP-P1TF28R', user='sa', password='123', database='kaercher')
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
# self.cursor.execute("INSERT INTO products_tb(productid, category, name, description, price, timestamp) VALUES (%s, %s, %s, %s, %s, %s)",
# (item['productid'], item['category'], item['name'], item['description'], item['price'], item['timestamp']))
sql_statement = f'''
BEGIN
IF NOT EXISTS (SELECT * FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']})
BEGIN
INSERT INTO [kaercher].[dbo].[products_tb] (productid, category, name, description)
OUTPUT (Inserted.productgroupid)
VALUES ({item['productid']}, {item['category']}, {item['name']}, {item['description']})
END
ELSE
BEGIN
SELECT productgroupid FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']}
END
END
'''
self.cursor.execute(sql_statement)
self.conn.commit()
return item
原文:
您在定义 sql_statement 时没有正确调用项目字典中的值。试试这个:
import pymssql
class KrcPipeline(object):
def __init__(self):
self.conn = pymssql.connect(host='DESKTOP-P1TF28R', user='sa', password='123', database='kaercher')
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
# self.cursor.execute("INSERT INTO products_tb(productid, category, name, description, price, timestamp) VALUES (%s, %s, %s, %s, %s, %s)",
# (item['productid'], item['category'], item['name'], item['description'], item['price'], item['timestamp']))
sql_statement = f'''
BEGIN
IF NOT EXIST (SELECT * FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']})
BEGIN
INSERT INTO [kaercher].[dbo].[products_tb] (productid, category, name, description)
OUTPUT (Inserted.productgroupid)
VALUES ({item['productid']}, {item['category']}, {item['name']}, {item['description']})
END
ELSE
BEGIN
SELECT productgroupid FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']}
END
END
'''
self.cursor.execute(sql_statement)
self.conn.commit()
return item