用于性能的 PostgreSQL CROSS JOIN 索引
PostgreSQL CROSS JOIN indexing for performance
这是我的第二部分。
所以我有以下 table,
CREATE TABLE public.main_transaction
(
id integer NOT NULL DEFAULT nextval('main_transaction_id_seq'::regclass),
profile_id integer NOT NULL,
request_no character varying(18),
user_id bigint,
.....
CONSTRAINT main_transaction_pkey PRIMARY KEY (id),
CONSTRAINT fk_main_transaction_user_id FOREIGN KEY (user_id)
REFERENCES public.jhi_user (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
REFERENCES public.main_profile (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY DEFERRED,
CONSTRAINT main_transaction_profile_id_20_fk_main_profile_id FOREIGN KEY (profile_id)
REFERENCES public.main_profile (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY DEFERRED,
)
在 table 中,我的 table 中有外键,我正在使用 CROSS JOIN
到 link table 由 id 并导致性能下降(超过 100 万行数据在 12 秒 内返回结果)。
更准确地说,我在 main_transaction(上面) table 中有一个 profile_id
列用于 profile table 和 个人资料 table 我有 customer_id
客户 table。所以,我的查询如下,
SELECT * FROM main_transaction t
CROSS JOIN main_profile p
CROSS JOIN main_customer c
WHERE t.profile_id = p.id
AND p.user_id = c.id
AND ( upper(t.request_no) LIKE upper(concat('%','0-90-6 12 ','%'))
OR upper(c.phone) LIKE upper(concat('%','0-90-6 12','%'))
)
如果你需要profile
和customer
table的结构,我可以评论。
解释:
"Merge Join (cost=27220.51..266464.85 rows=218 width=1692) (actual time=9399.370..9399.370 rows=0 loops=1)"
" Merge Cond: (t.profile_id = p.id)"
" Join Filter: ((upper((t.request_no)::text) ~~ upper(concat('%', ' 0-90-6 12 ', '%'))) OR (upper((c.phone)::text) ~~ upper(concat('%', ' 0-90-6 12, '%'))))"
" Rows Removed by Join Filter: 1089489"
" Buffers: shared hit=453158 read=413372, temp read=1560 written=1560"
" -> Index Scan using main_transaction_profile_id_idx on main_transaction t (cost=0.43..198177.36 rows=1089489 width=1455) (actual time=0.004..3913.501 rows=1089489 loops=1)"
" Buffers: shared hit=393656 read=410718"
" -> Materialize (cost=27218.84..27645.70 rows=85372 width=237) (actual time=165.565..239.572 rows=1133650 loops=1)"
" Buffers: shared hit=59502 read=2654, temp read=1560 written=1560"
" -> Sort (cost=27218.84..27432.27 rows=85372 width=237) (actual time=165.560..188.746 rows=85368 loops=1)"
" Sort Key: p.id"
" Sort Method: external sort Disk: 12480kB"
" Buffers: shared hit=59502 read=2654, temp read=1560 written=1560"
" -> Merge Join (cost=0.73..10594.24 rows=85372 width=237) (actual time=0.007..96.133 rows=85372 loops=1)"
" Merge Cond: (p.user_id = c.id)"
" Buffers: shared hit=59502 read=2654"
" -> Index Scan using main_profile_e8701ad4 on main_profile p (cost=0.29..3350.82 rows=85372 width=40) (actual time=0.003..24.892 rows=85372 loops=1)"
" Buffers: shared hit=29016 read=963"
" -> Index Scan using main_customer_pkey on main_customer c (cost=0.29..5963.16 rows=85506 width=197) (actual time=0.003..34.235 rows=85506 loops=1)"
" Buffers: shared hit=30486 read=1691"
"Planning time: 0.850 ms"
"Execution time: 9407.244 ms"
您的主要问题是 OR
— 只要您的 WHERE
子句中有这样的 OR
,您就永远无法获得良好的性能。
重写查询如下:
SELECT * FROM main_transaction t
JOIN main_profile p ON t.profile_id = p.id
JOIN main_customer c ON p.user_id = c.id
WHERE upper(t.request_no) LIKE upper(concat('%','0-90-6 12 ','%'))
UNION
SELECT * FROM main_transaction t
JOIN main_profile p ON t.profile_id = p.id
JOIN main_customer c ON p.user_id = c.id
WHERE upper(c.phone) LIKE upper(concat('%','0-90-6 12','%'));
然后确保您具有以下索引(除了 id
上的索引):
CREATE INDEX ON main_transaction (profile_id);
CREATE INDEX ON main_transaction USING gin (upper(request_no) gin_trgm_ops);
CREATE INDEX ON main_profile (user_id);
CREATE INDEX ON main_customer USING gin (upper(phone) gin_trgm_ops);
这应该会有所作为。
这是我
CREATE TABLE public.main_transaction
(
id integer NOT NULL DEFAULT nextval('main_transaction_id_seq'::regclass),
profile_id integer NOT NULL,
request_no character varying(18),
user_id bigint,
.....
CONSTRAINT main_transaction_pkey PRIMARY KEY (id),
CONSTRAINT fk_main_transaction_user_id FOREIGN KEY (user_id)
REFERENCES public.jhi_user (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
REFERENCES public.main_profile (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY DEFERRED,
CONSTRAINT main_transaction_profile_id_20_fk_main_profile_id FOREIGN KEY (profile_id)
REFERENCES public.main_profile (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY DEFERRED,
)
在 table 中,我的 table 中有外键,我正在使用 CROSS JOIN
到 link table 由 id 并导致性能下降(超过 100 万行数据在 12 秒 内返回结果)。
更准确地说,我在 main_transaction(上面) table 中有一个 profile_id
列用于 profile table 和 个人资料 table 我有 customer_id
客户 table。所以,我的查询如下,
SELECT * FROM main_transaction t
CROSS JOIN main_profile p
CROSS JOIN main_customer c
WHERE t.profile_id = p.id
AND p.user_id = c.id
AND ( upper(t.request_no) LIKE upper(concat('%','0-90-6 12 ','%'))
OR upper(c.phone) LIKE upper(concat('%','0-90-6 12','%'))
)
如果你需要profile
和customer
table的结构,我可以评论。
解释:
"Merge Join (cost=27220.51..266464.85 rows=218 width=1692) (actual time=9399.370..9399.370 rows=0 loops=1)"
" Merge Cond: (t.profile_id = p.id)"
" Join Filter: ((upper((t.request_no)::text) ~~ upper(concat('%', ' 0-90-6 12 ', '%'))) OR (upper((c.phone)::text) ~~ upper(concat('%', ' 0-90-6 12, '%'))))"
" Rows Removed by Join Filter: 1089489"
" Buffers: shared hit=453158 read=413372, temp read=1560 written=1560"
" -> Index Scan using main_transaction_profile_id_idx on main_transaction t (cost=0.43..198177.36 rows=1089489 width=1455) (actual time=0.004..3913.501 rows=1089489 loops=1)"
" Buffers: shared hit=393656 read=410718"
" -> Materialize (cost=27218.84..27645.70 rows=85372 width=237) (actual time=165.565..239.572 rows=1133650 loops=1)"
" Buffers: shared hit=59502 read=2654, temp read=1560 written=1560"
" -> Sort (cost=27218.84..27432.27 rows=85372 width=237) (actual time=165.560..188.746 rows=85368 loops=1)"
" Sort Key: p.id"
" Sort Method: external sort Disk: 12480kB"
" Buffers: shared hit=59502 read=2654, temp read=1560 written=1560"
" -> Merge Join (cost=0.73..10594.24 rows=85372 width=237) (actual time=0.007..96.133 rows=85372 loops=1)"
" Merge Cond: (p.user_id = c.id)"
" Buffers: shared hit=59502 read=2654"
" -> Index Scan using main_profile_e8701ad4 on main_profile p (cost=0.29..3350.82 rows=85372 width=40) (actual time=0.003..24.892 rows=85372 loops=1)"
" Buffers: shared hit=29016 read=963"
" -> Index Scan using main_customer_pkey on main_customer c (cost=0.29..5963.16 rows=85506 width=197) (actual time=0.003..34.235 rows=85506 loops=1)"
" Buffers: shared hit=30486 read=1691"
"Planning time: 0.850 ms"
"Execution time: 9407.244 ms"
您的主要问题是 OR
— 只要您的 WHERE
子句中有这样的 OR
,您就永远无法获得良好的性能。
重写查询如下:
SELECT * FROM main_transaction t
JOIN main_profile p ON t.profile_id = p.id
JOIN main_customer c ON p.user_id = c.id
WHERE upper(t.request_no) LIKE upper(concat('%','0-90-6 12 ','%'))
UNION
SELECT * FROM main_transaction t
JOIN main_profile p ON t.profile_id = p.id
JOIN main_customer c ON p.user_id = c.id
WHERE upper(c.phone) LIKE upper(concat('%','0-90-6 12','%'));
然后确保您具有以下索引(除了 id
上的索引):
CREATE INDEX ON main_transaction (profile_id);
CREATE INDEX ON main_transaction USING gin (upper(request_no) gin_trgm_ops);
CREATE INDEX ON main_profile (user_id);
CREATE INDEX ON main_customer USING gin (upper(phone) gin_trgm_ops);
这应该会有所作为。