将 keep dense_rank 从 Oracle 查询转换为 postgres
Convert keep dense_rank from Oracle query into postgres
我正在尝试将以下 Oracle 查询转换为 Postgres
select
this_.GLOBAL_TRANSACTION_ID as y0_,
this_.BUSINESS_IDENTIFIER as y1_,
this_.ENVIRONMENT as y2_,
count(*) as y3_,
this_.HOST_NAME as y4_,
listagg(process,
', ') within
group (order by
date_time) as process,
min(this_.DATE_TIME) as y6_,
max(this_.DATE_TIME) as y7_,
max(status)keep(dense_rank last
order by
date_time,
decode(status,
'COMPLETED',
'd',
'FAILED',
'c',
'TERMINATED',
'b',
'STARTED',
'a',
'z')) as status
from
ACTIVITY_MONITOR_TRANSACTION this_
where
this_.DATE_TIME between ? and ?
and 1=1
group by
this_.GLOBAL_TRANSACTION_ID,
this_.BUSINESS_IDENTIFIER,
this_.ENVIRONMENT,
this_.HOST_NAME,
global_transaction_id,
business_identifier,
global_transaction_id,
business_identifier
order by
y7_ asc
问题是我不知道如何转换这个块:
max(status)keep(dense_rank last
order by
date_time,
decode(status,
'COMPLETED',
'd',
'FAILED',
'c',
'TERMINATED',
'b',
'STARTED',
'a',
'z')) as status
这个块的目的是获取最新的状态,如果完全相同的时间(这是可能的!)按照上面的顺序分配状态。
This is an example of data:
ID DATA_TIME GLOBAL_TRANSACTION_ID STATUS
===================================================================
54938456;"2015-04-20 09:39:27";"8d276718-eca7-4fd0-a266 ;"STARTED"
54938505;"2015-04-20 09:39:27";"8d276718-eca7-4fd0-a266 ;"COMPLETED"
54938507;"2015-04-20 09:39:27";"8d276718-eca7-4fd0-a266 ;"FAILED"
54938507;"2015-04-20 09:38:25";"8d276718-eca7-4fd0-a266 ;"FAILED"
状态应该是 "COMPLETED" 所以我的查询应该 return,在其他行中,如下:
GLOBAL_TRANSACTION_ID COUNT (...) STATUS
=====================================================
8d276718-eca7-4fd0-a266 4 (...) COMPLETED
我尝试将查询拆分为 2:
select
this_.GLOBAL_TRANSACTION_ID as y0_,
this_.BUSINESS_IDENTIFIER as y1_,
this_.ENVIRONMENT as y2_,
count(*) as y3_,
this_.HOST_NAME as y4_,
array_to_string(array_agg(distinct process),
',') as process,
min(this_.DATE_TIME) as y6_,
max(this_.DATE_TIME) as y7_,
max(this_.STATUS) as y8_
from
ACTIVITY_MONITOR_TRANSACTION this_
where
this_.DATE_TIME between ? and ?
group by
this_.GLOBAL_TRANSACTION_ID,
this_.BUSINESS_IDENTIFIER,
this_.ENVIRONMENT,
this_.HOST_NAME,
global_transaction_id,
business_identifier
order by
y7_ desc limit ?
然后
select
status
from
activity_monitor_transaction
where
GLOBAL_TRANSACTION_ID=?
order by
date_time DESC,
CASE status
WHEN 'COMPLETED'THEN 'd'
WHEN 'FAILED' THEN 'c'
WHEN 'TERMINATED' THEN 'b'
WHEN 'STARTED' THEN 'a'
ELSE 'z'
END DESC LIMIT 1
但这导致了我的性能问题,因为我必须每行执行一次第二个查询。
这是 postgres 的 table 脚本:
CREATE TABLE activity_monitor_transaction
(
id numeric(11,0) NOT NULL,
date_time timestamp(6) without time zone NOT NULL,
global_transaction_id character varying(40) NOT NULL,
repost_flag character(1) NOT NULL DEFAULT 'N'::bpchar,
environment character varying(20),
transaction_mode character varying(20),
status character varying(20),
step character varying(80),
event character varying(20),
event_code character varying(20),
event_subcode character varying(20),
summary character varying(200),
business_identifier character varying(80),
alternate_business_identifier character varying(80),
domain character varying(20),
process character varying(80),
service_name character varying(80),
service_version character varying(20),
detail text,
app_name character varying(80),
app_user character varying(20),
host_name character varying(80),
thread_name character varying(200),
CONSTRAINT activity_monitor_transact_pk PRIMARY KEY (id)
USING INDEX TABLESPACE actmon_data
)
和一些数据:
insert into ACTIVITY_MONITOR_TRANSACTION values
(54938456,'2015-04-20 09:39:27','8d276718-eca7-4fd0-a266-d465181f911a','N','Perf','','STARTED','servicereq.p2p.rso.blaze.dedup.in.channel','PROCESS','','','','3100729','51174628','ERP','servicereq-p2p-rso-blaze','servicereq-p2p-rso-blaze','1.0.0-SNAPSHOT','','servicereq-p2p-rso-blaze','CIC','intintprf20','SimpleAsyncTaskExecutor-88177');
insert into ACTIVITY_MONITOR_TRANSACTION values
(54938505,'2015-04-20 09:45:27','8d276718-eca7-4fd0-a266-d465181f911a','N','Perf','','COMPLETED','servicereq.p2p.rso.blaze.service.out.channel','PROCESS','','','','3100729','51174628','ERP','servicereq-p2p-rso-blaze','servicereq-p2p-rso-blaze','1.0.0-SNAPSHOT','','servicereq-p2p-rso-blaze','CIC','intintprf20','SimpleAsyncTaskExecutor-88177');
insert into ACTIVITY_MONITOR_TRANSACTION values
(54938507,'2015-04-20 09:45:27','8d276718-eca7-4fd0-a266-d465181f911a','N','Perf','','FAILED','inputChannel','PROCESS','','','','3100729','','ERP','servicereq-p2p-rso-blaze','servicereq-p2p-rso-blaze','1.0.0-SNAPSHOT','','servicereq-p2p-rso-blaze','CIC','intintprf20','SimpleAsyncTaskExecutor-88177');
有什么方法可以模仿 keep dense_rank 块到 postgres 以便只有一个查询吗?
您可以使用 PostgreSQL WINDOW FUNCTIONS
-- we only added infos to the activity_monitor_transaction
-- we are free to group by date_time or status
SELECT
first_value(status) OVER w AS global_transaction_status,
count(*) OVER w AS global_transaction_count,
activity_monitor_transaction.*
FROM
activity_monitor_transaction
WINDOW w AS (
PARTITION BY global_transaction_id
ORDER BY date_time DESC, id DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
示例代码
CREATE TABLE prices
(
ticker VARCHAR2(3),
pdate DATE,
price FLOAT
);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-22', 55.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-22', 56.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-25', 57.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-25', 57.22);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-27', 58.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-11-29', 57.13);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-11-29', 57.20);
INSERT INTO prices( ticker, pdate, price) VALUES ('eur', DATE'2016-11-22', 65.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('eur', DATE'2016-11-22', 65.12);
INSERT INTO prices( ticker, pdate, price) VALUES ('eur', DATE'2016-11-25', 67.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('eur', DATE'2016-11-27', 68.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('chf', DATE'2016-11-22', 88.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('chf', DATE'2016-11-25', 88.33);
INSERT INTO prices( ticker, pdate, price) VALUES ('chf', DATE'2016-11-25', 89.33);
甲骨文代码
SELECT
ticker,
min(price) KEEP (DENSE_RANK FIRST ORDER BY pdate) as minfirstprice,
max(price) KEEP (DENSE_RANK FIRST ORDER BY pdate) as maxfirstprice,
min(price) KEEP (DENSE_RANK LAST ORDER BY pdate) as minlastprice,
max(price) KEEP (DENSE_RANK LAST ORDER BY pdate) as maxlastprice,
sum(price) KEEP (DENSE_RANK FIRST ORDER BY pdate) as sumfirstprice,
sum(price) KEEP (DENSE_RANK LAST ORDER BY pdate) as sumlastprice
FROM prices
GROUP BY ticker
ORDER BY ticker;
Postgresql 代码
with _prepare as (
select ticker, pdate,
min(price) over w as minprice,
max(price) over w as maxprice,
sum(price) over w as sumprice
FROM prices
WINDOW w AS (partition by ticker,pdate)
)
SELECT distinct * from
(
select ticker,
first_value(minprice) over w1 as minfirstprice,
first_value(maxprice) over w1 as maxfirstprice,
first_value(minprice) over w2 as minlastprice,
first_value(maxprice) over w2 as maxlastprice,
first_value(sumprice) over w1 as sumfirstprice,
first_value(sumprice) over w2 as sumlastprice
from _prepare
WINDOW w1 AS (partition by ticker ORDER BY pdate),
w2 AS (partition by ticker ORDER BY pdate desc)
) t2
ORDER BY ticker;
我正在尝试将以下 Oracle 查询转换为 Postgres
select
this_.GLOBAL_TRANSACTION_ID as y0_,
this_.BUSINESS_IDENTIFIER as y1_,
this_.ENVIRONMENT as y2_,
count(*) as y3_,
this_.HOST_NAME as y4_,
listagg(process,
', ') within
group (order by
date_time) as process,
min(this_.DATE_TIME) as y6_,
max(this_.DATE_TIME) as y7_,
max(status)keep(dense_rank last
order by
date_time,
decode(status,
'COMPLETED',
'd',
'FAILED',
'c',
'TERMINATED',
'b',
'STARTED',
'a',
'z')) as status
from
ACTIVITY_MONITOR_TRANSACTION this_
where
this_.DATE_TIME between ? and ?
and 1=1
group by
this_.GLOBAL_TRANSACTION_ID,
this_.BUSINESS_IDENTIFIER,
this_.ENVIRONMENT,
this_.HOST_NAME,
global_transaction_id,
business_identifier,
global_transaction_id,
business_identifier
order by
y7_ asc
问题是我不知道如何转换这个块:
max(status)keep(dense_rank last
order by
date_time,
decode(status,
'COMPLETED',
'd',
'FAILED',
'c',
'TERMINATED',
'b',
'STARTED',
'a',
'z')) as status
这个块的目的是获取最新的状态,如果完全相同的时间(这是可能的!)按照上面的顺序分配状态。
This is an example of data:
ID DATA_TIME GLOBAL_TRANSACTION_ID STATUS
===================================================================
54938456;"2015-04-20 09:39:27";"8d276718-eca7-4fd0-a266 ;"STARTED"
54938505;"2015-04-20 09:39:27";"8d276718-eca7-4fd0-a266 ;"COMPLETED"
54938507;"2015-04-20 09:39:27";"8d276718-eca7-4fd0-a266 ;"FAILED"
54938507;"2015-04-20 09:38:25";"8d276718-eca7-4fd0-a266 ;"FAILED"
状态应该是 "COMPLETED" 所以我的查询应该 return,在其他行中,如下:
GLOBAL_TRANSACTION_ID COUNT (...) STATUS
=====================================================
8d276718-eca7-4fd0-a266 4 (...) COMPLETED
我尝试将查询拆分为 2:
select
this_.GLOBAL_TRANSACTION_ID as y0_,
this_.BUSINESS_IDENTIFIER as y1_,
this_.ENVIRONMENT as y2_,
count(*) as y3_,
this_.HOST_NAME as y4_,
array_to_string(array_agg(distinct process),
',') as process,
min(this_.DATE_TIME) as y6_,
max(this_.DATE_TIME) as y7_,
max(this_.STATUS) as y8_
from
ACTIVITY_MONITOR_TRANSACTION this_
where
this_.DATE_TIME between ? and ?
group by
this_.GLOBAL_TRANSACTION_ID,
this_.BUSINESS_IDENTIFIER,
this_.ENVIRONMENT,
this_.HOST_NAME,
global_transaction_id,
business_identifier
order by
y7_ desc limit ?
然后
select
status
from
activity_monitor_transaction
where
GLOBAL_TRANSACTION_ID=?
order by
date_time DESC,
CASE status
WHEN 'COMPLETED'THEN 'd'
WHEN 'FAILED' THEN 'c'
WHEN 'TERMINATED' THEN 'b'
WHEN 'STARTED' THEN 'a'
ELSE 'z'
END DESC LIMIT 1
但这导致了我的性能问题,因为我必须每行执行一次第二个查询。
这是 postgres 的 table 脚本:
CREATE TABLE activity_monitor_transaction
(
id numeric(11,0) NOT NULL,
date_time timestamp(6) without time zone NOT NULL,
global_transaction_id character varying(40) NOT NULL,
repost_flag character(1) NOT NULL DEFAULT 'N'::bpchar,
environment character varying(20),
transaction_mode character varying(20),
status character varying(20),
step character varying(80),
event character varying(20),
event_code character varying(20),
event_subcode character varying(20),
summary character varying(200),
business_identifier character varying(80),
alternate_business_identifier character varying(80),
domain character varying(20),
process character varying(80),
service_name character varying(80),
service_version character varying(20),
detail text,
app_name character varying(80),
app_user character varying(20),
host_name character varying(80),
thread_name character varying(200),
CONSTRAINT activity_monitor_transact_pk PRIMARY KEY (id)
USING INDEX TABLESPACE actmon_data
)
和一些数据:
insert into ACTIVITY_MONITOR_TRANSACTION values
(54938456,'2015-04-20 09:39:27','8d276718-eca7-4fd0-a266-d465181f911a','N','Perf','','STARTED','servicereq.p2p.rso.blaze.dedup.in.channel','PROCESS','','','','3100729','51174628','ERP','servicereq-p2p-rso-blaze','servicereq-p2p-rso-blaze','1.0.0-SNAPSHOT','','servicereq-p2p-rso-blaze','CIC','intintprf20','SimpleAsyncTaskExecutor-88177');
insert into ACTIVITY_MONITOR_TRANSACTION values
(54938505,'2015-04-20 09:45:27','8d276718-eca7-4fd0-a266-d465181f911a','N','Perf','','COMPLETED','servicereq.p2p.rso.blaze.service.out.channel','PROCESS','','','','3100729','51174628','ERP','servicereq-p2p-rso-blaze','servicereq-p2p-rso-blaze','1.0.0-SNAPSHOT','','servicereq-p2p-rso-blaze','CIC','intintprf20','SimpleAsyncTaskExecutor-88177');
insert into ACTIVITY_MONITOR_TRANSACTION values
(54938507,'2015-04-20 09:45:27','8d276718-eca7-4fd0-a266-d465181f911a','N','Perf','','FAILED','inputChannel','PROCESS','','','','3100729','','ERP','servicereq-p2p-rso-blaze','servicereq-p2p-rso-blaze','1.0.0-SNAPSHOT','','servicereq-p2p-rso-blaze','CIC','intintprf20','SimpleAsyncTaskExecutor-88177');
有什么方法可以模仿 keep dense_rank 块到 postgres 以便只有一个查询吗?
您可以使用 PostgreSQL WINDOW FUNCTIONS
-- we only added infos to the activity_monitor_transaction
-- we are free to group by date_time or status
SELECT
first_value(status) OVER w AS global_transaction_status,
count(*) OVER w AS global_transaction_count,
activity_monitor_transaction.*
FROM
activity_monitor_transaction
WINDOW w AS (
PARTITION BY global_transaction_id
ORDER BY date_time DESC, id DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
示例代码
CREATE TABLE prices
(
ticker VARCHAR2(3),
pdate DATE,
price FLOAT
);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-22', 55.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-22', 56.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-25', 57.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-25', 57.22);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-10-27', 58.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-11-29', 57.13);
INSERT INTO prices( ticker, pdate, price) VALUES ('usd', DATE'2016-11-29', 57.20);
INSERT INTO prices( ticker, pdate, price) VALUES ('eur', DATE'2016-11-22', 65.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('eur', DATE'2016-11-22', 65.12);
INSERT INTO prices( ticker, pdate, price) VALUES ('eur', DATE'2016-11-25', 67.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('eur', DATE'2016-11-27', 68.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('chf', DATE'2016-11-22', 88.11);
INSERT INTO prices( ticker, pdate, price) VALUES ('chf', DATE'2016-11-25', 88.33);
INSERT INTO prices( ticker, pdate, price) VALUES ('chf', DATE'2016-11-25', 89.33);
甲骨文代码
SELECT
ticker,
min(price) KEEP (DENSE_RANK FIRST ORDER BY pdate) as minfirstprice,
max(price) KEEP (DENSE_RANK FIRST ORDER BY pdate) as maxfirstprice,
min(price) KEEP (DENSE_RANK LAST ORDER BY pdate) as minlastprice,
max(price) KEEP (DENSE_RANK LAST ORDER BY pdate) as maxlastprice,
sum(price) KEEP (DENSE_RANK FIRST ORDER BY pdate) as sumfirstprice,
sum(price) KEEP (DENSE_RANK LAST ORDER BY pdate) as sumlastprice
FROM prices
GROUP BY ticker
ORDER BY ticker;
Postgresql 代码
with _prepare as (
select ticker, pdate,
min(price) over w as minprice,
max(price) over w as maxprice,
sum(price) over w as sumprice
FROM prices
WINDOW w AS (partition by ticker,pdate)
)
SELECT distinct * from
(
select ticker,
first_value(minprice) over w1 as minfirstprice,
first_value(maxprice) over w1 as maxfirstprice,
first_value(minprice) over w2 as minlastprice,
first_value(maxprice) over w2 as maxlastprice,
first_value(sumprice) over w1 as sumfirstprice,
first_value(sumprice) over w2 as sumlastprice
from _prepare
WINDOW w1 AS (partition by ticker ORDER BY pdate),
w2 AS (partition by ticker ORDER BY pdate desc)
) t2
ORDER BY ticker;