以高性能将数据插入psql数据库
Inserting data into psql database with high performance
假设,我有一个 Python 程序,并且我有一个 Offer
对象 Offer(title='title1', category='cat1', regions=['reg1'])
。
我想将此 Offer
添加到 psql 数据库中,查询次数最少(性能)。 新 地区和类别的插入很少见(地区和类别的数量有限(并且是唯一的),但优惠数量是无限的)。
基本上可以通过查询插入Regions
和Categories
:
INSERT INTO Categories(name)
SELECT 'cat1'
WHERE NOT EXISTS(
SELECT 1 FROM Categories WHERE name = 'cat1'
)
RETURNING id;
,但我需要执行另一个查询来获取 region/category 的 ID(当 region/category 已经存在时)。我需要这个 id 来执行查询插入数据到 Offers
table:
INSERT INTO Offers(title, category)
SELECT 'title1', (SELECT id FROM Regions WHERE name = 'reg1')
WHERE NOT EXISTS(
SELECT 1 FROM Offers WHERE title = 'title1' AND category = (SELECT id FROM Regions WHERE name = 'reg1')
);
目前我的代码如下所示:
INSERT INTO Categories(name)
SELECT 'cat1'
WHERE NOT EXISTS(
SELECT 1 FROM Categories WHERE name = 'cat1'
);
INSERT INTO Regions(name)
SELECT 'reg1'
WHERE NOT EXISTS(
SELECT 1 FROM Regions WHERE name = 'reg1'
);
INSERT INTO Offers(title, category)
SELECT 'title1', (SELECT id FROM Regions WHERE name = 'reg1')
WHERE NOT EXISTS(
SELECT 1 FROM Offers WHERE title = 'title1' AND category = (SELECT id FROM Regions WHERE name = 'reg1')
);
INSERT INTO OfferRegions(offer, region)
SELECT (SELECT id FROM Offers WHERE title = 'title1'), (SELECT id FROM Regions WHERE name = 'reg1')
WHERE NOT EXISTS(
SELECT 1 FROM OfferRegions WHERE offer = (SELECT id FROM Offers WHERE title = 'title1') AND region = (SELECT id FROM Regions WHERE name = 'reg1')
);
我不知道如何做到既稳健又高效(没有不必要的 SELECT)。我和 Python/psycopg2.
一起工作
表格:
CREATE TABLE IF NOT EXISTS Regions
(
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
UNIQUE (name)
);
CREATE TABLE IF NOT EXISTS Categories
(
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
UNIQUE (name)
);
CREATE TABLE IF NOT EXISTS Offers
(
id SERIAL PRIMARY KEY,
title TEXT,
category SERIAL REFERENCES Categories (id) ON UPDATE CASCADE ON DELETE CASCADE,
UNIQUE (id)
);
CREATE TABLE IF NOT EXISTS OfferRegions
(
offer SERIAL REFERENCES Offers (id) ON UPDATE CASCADE ON DELETE CASCADE,
region SERIAL REFERENCES Regions (id) ON UPDATE CASCADE ON DELETE CASCADE,
UNIQUE (offer, region)
);
您正在寻找 INSERT ... ON CONFLICT DO NOTHING
.
为此,您需要对标识对象的列设置唯一约束。
这允许您检索生成的 id
,例如:
INSERT INTO categories (name) VALUES ('cat1')
ON CONFLICT ON (name) DO NOTHING
RETURNING id;
依赖表可以这样填充,使用一个变量cat_id
,该变量由上述查询的结果设置:
INSERT INTO offers (category, title)
SELECT CASE WHEN cat_id IS NULL
THEN (SELECT id FROM categories WHERE name = 'cat1')
ELSE cat_id
END,
/* similar for "title */
当然存在竞争条件:有人可以在您的第二个 INSERT
之前删除 categories
行。但也许这已经足够了。
假设,我有一个 Python 程序,并且我有一个 Offer
对象 Offer(title='title1', category='cat1', regions=['reg1'])
。
我想将此 Offer
添加到 psql 数据库中,查询次数最少(性能)。 新 地区和类别的插入很少见(地区和类别的数量有限(并且是唯一的),但优惠数量是无限的)。
基本上可以通过查询插入Regions
和Categories
:
INSERT INTO Categories(name)
SELECT 'cat1'
WHERE NOT EXISTS(
SELECT 1 FROM Categories WHERE name = 'cat1'
)
RETURNING id;
,但我需要执行另一个查询来获取 region/category 的 ID(当 region/category 已经存在时)。我需要这个 id 来执行查询插入数据到 Offers
table:
INSERT INTO Offers(title, category)
SELECT 'title1', (SELECT id FROM Regions WHERE name = 'reg1')
WHERE NOT EXISTS(
SELECT 1 FROM Offers WHERE title = 'title1' AND category = (SELECT id FROM Regions WHERE name = 'reg1')
);
目前我的代码如下所示:
INSERT INTO Categories(name)
SELECT 'cat1'
WHERE NOT EXISTS(
SELECT 1 FROM Categories WHERE name = 'cat1'
);
INSERT INTO Regions(name)
SELECT 'reg1'
WHERE NOT EXISTS(
SELECT 1 FROM Regions WHERE name = 'reg1'
);
INSERT INTO Offers(title, category)
SELECT 'title1', (SELECT id FROM Regions WHERE name = 'reg1')
WHERE NOT EXISTS(
SELECT 1 FROM Offers WHERE title = 'title1' AND category = (SELECT id FROM Regions WHERE name = 'reg1')
);
INSERT INTO OfferRegions(offer, region)
SELECT (SELECT id FROM Offers WHERE title = 'title1'), (SELECT id FROM Regions WHERE name = 'reg1')
WHERE NOT EXISTS(
SELECT 1 FROM OfferRegions WHERE offer = (SELECT id FROM Offers WHERE title = 'title1') AND region = (SELECT id FROM Regions WHERE name = 'reg1')
);
我不知道如何做到既稳健又高效(没有不必要的 SELECT)。我和 Python/psycopg2.
一起工作表格:
CREATE TABLE IF NOT EXISTS Regions
(
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
UNIQUE (name)
);
CREATE TABLE IF NOT EXISTS Categories
(
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
UNIQUE (name)
);
CREATE TABLE IF NOT EXISTS Offers
(
id SERIAL PRIMARY KEY,
title TEXT,
category SERIAL REFERENCES Categories (id) ON UPDATE CASCADE ON DELETE CASCADE,
UNIQUE (id)
);
CREATE TABLE IF NOT EXISTS OfferRegions
(
offer SERIAL REFERENCES Offers (id) ON UPDATE CASCADE ON DELETE CASCADE,
region SERIAL REFERENCES Regions (id) ON UPDATE CASCADE ON DELETE CASCADE,
UNIQUE (offer, region)
);
您正在寻找 INSERT ... ON CONFLICT DO NOTHING
.
为此,您需要对标识对象的列设置唯一约束。
这允许您检索生成的 id
,例如:
INSERT INTO categories (name) VALUES ('cat1')
ON CONFLICT ON (name) DO NOTHING
RETURNING id;
依赖表可以这样填充,使用一个变量cat_id
,该变量由上述查询的结果设置:
INSERT INTO offers (category, title)
SELECT CASE WHEN cat_id IS NULL
THEN (SELECT id FROM categories WHERE name = 'cat1')
ELSE cat_id
END,
/* similar for "title */
当然存在竞争条件:有人可以在您的第二个 INSERT
之前删除 categories
行。但也许这已经足够了。