count(distinct) over (partition by... 在 Oracle SQL 中不起作用
count(distinct) over (partition by... doesn't work in Oracle SQL
我想统计过去 30 天的 distinct
day_number
。但是,distinct 函数不能与 over
一起使用
如果我删除 distinct
,它会给我 day_number
的总数,但是 day_number
可能有很多重复项。所以这就是为什么我要添加 distinct
.
select tr.*,
count( distinct day_number) OVER (PARTITION BY ACCOUNT ORDER BY DAY_number range 29 PRECEDING) as result
from table tr;
任何人都可以告诉我如何计算 over(partition by..)
语句中的不同数字吗?提前致谢。
您可以先创建一个只列出每个 ID 一次的列,然后对该列进行范围计数,例如:
WITH sd AS (SELECT 1 ID, 10 val FROM dual UNION ALL
SELECT 1 ID, 20 val FROM dual UNION ALL
SELECT 2 ID, 30 val FROM dual UNION ALL
SELECT 2 ID, 40 val FROM dual UNION ALL
SELECT 4 ID, 50 val FROM dual UNION ALL
SELECT 4 ID, 60 val FROM dual UNION ALL
SELECT 6 ID, 70 val FROM dual)
SELECT ID,
val,
COUNT(id_distinct) OVER (ORDER BY ID RANGE 3 PRECEDING) cnt_disinct_ids
FROM (SELECT ID,
val,
CASE WHEN row_number() OVER (PARTITION BY ID ORDER BY val) = 1 THEN ID END id_distinct
FROM sd);
ID VAL CNT_DISINCT_IDS
---------- ---------- ---------------
1 10 1
1 20 1
2 30 2
2 40 2
4 50 3
4 60 3
6 70 2
ETA:证明上述技术适用于您的数据:
WITH your_table AS (SELECT 'ABCDE' account_sk, 23 day_sk FROM dual UNION ALL
SELECT 'ABCDE' account_sk, 23 day_sk FROM dual UNION ALL
SELECT 'ABCDE' account_sk, 24 day_sk FROM dual UNION ALL
SELECT 'ABCDE' account_sk, 25 day_sk FROM dual UNION ALL
SELECT 'ABCDE' account_sk, 53 day_sk FROM dual UNION ALL
SELECT 'ABCDE' account_sk, 53 day_sk FROM dual UNION ALL
SELECT 'ABCDE' account_sk, 55 day_sk FROM dual UNION ALL
SELECT 'VWXYZ' account_sk, 10 day_sk FROM dual UNION ALL
SELECT 'VWXYZ' account_sk, 12 day_sk FROM dual UNION ALL
SELECT 'VWXYZ' account_sk, 40 day_sk FROM dual UNION ALL
SELECT 'VWXYZ' account_sk, 40 day_sk FROM dual)
SELECT account_sk,
day_sk,
COUNT(day_sk_distinct) OVER (PARTITION BY account_sk ORDER BY day_sk RANGE BETWEEN 29 PRECEDING AND CURRENT ROW) count_distinct_day_sk
FROM (SELECT account_sk,
day_sk,
CASE WHEN row_number() OVER (PARTITION BY account_sk, day_sk ORDER BY day_sk) = 1 THEN day_sk END day_sk_distinct
FROM your_table);
ACCOUNT_SK DAY_SK COUNT_DISTINCT_DAY_SK
---------- ---------- ---------------------
ABCDE 23 1
ABCDE 23 1
ABCDE 24 2
ABCDE 25 3
ABCDE 53 3
ABCDE 53 3
ABCDE 55 2
VWXYZ 10 1
VWXYZ 12 2
VWXYZ 40 2
VWXYZ 40 2
count(distinct ...)
与 over 子句配合使用效果很好,主要问题是排序依据。您不能执行 count (distinct ..) over (partition by ... order by ...)
,因为 DISTINCT 函数和 RATIO_TO_REPORT 不能有 ORDER BY。所以我这样做了:
select tr.*, count (distinct day_number) over (partition by account)
from (select t.*, row_number() over (partition by account order by day_number) row_number from table t) tr
where row_number < 30;
我在人力资源部的employees scheme中测试过(免费的oracle scheme随处可见)
我不确定它是否适用于您的架构,因为我没有它的副本,但如果没有,它应该会给您一些想法:
select count (distinct manager_id) over (partition by department_id), department_id, manager_id
from (select e.*, row_number() over (partition by department_id order by employee_id) row_number from employees e)
where row_number < 30;
您可以通过创建一个列来模拟这一点,当“新中断”出现在有序列表中时该列为 1,否则为 null。然后你只需要计算或总结这些突破指标。 count() 和 sum() 都支持“over(order by...)”。
在您的示例中,它将是:
with TR as
(select 1 as PK, 'ABCDE' as ACCOUNT, 23 as DAY_number from dual
union all select 2, 'ABCDE', 23 from dual
union all select 3, 'ABCDE', 24 from dual
union all select 4, 'ABCDE', 25 from dual
)
select tr.*
, count( /*distinct*/ day_number) OVER (PARTITION BY ACCOUNT ORDER BY DAY_number range 29 PRECEDING) as wrong_result
, count(IS_NEW_BREAK) over(PARTITION BY ACCOUNT order by day_number range 29 PRECEDING) as desired_output
from
(select tr.*
, case min(case when day_number is not null then PK end) over(PARTITION BY ACCOUNT, day_number) when PK then 1 end as IS_NEW_BREAK
from tr
) tr;
我想统计过去 30 天的 distinct
day_number
。但是,distinct 函数不能与 over
如果我删除 distinct
,它会给我 day_number
的总数,但是 day_number
可能有很多重复项。所以这就是为什么我要添加 distinct
.
select tr.*,
count( distinct day_number) OVER (PARTITION BY ACCOUNT ORDER BY DAY_number range 29 PRECEDING) as result
from table tr;
任何人都可以告诉我如何计算 over(partition by..)
语句中的不同数字吗?提前致谢。
您可以先创建一个只列出每个 ID 一次的列,然后对该列进行范围计数,例如:
WITH sd AS (SELECT 1 ID, 10 val FROM dual UNION ALL
SELECT 1 ID, 20 val FROM dual UNION ALL
SELECT 2 ID, 30 val FROM dual UNION ALL
SELECT 2 ID, 40 val FROM dual UNION ALL
SELECT 4 ID, 50 val FROM dual UNION ALL
SELECT 4 ID, 60 val FROM dual UNION ALL
SELECT 6 ID, 70 val FROM dual)
SELECT ID,
val,
COUNT(id_distinct) OVER (ORDER BY ID RANGE 3 PRECEDING) cnt_disinct_ids
FROM (SELECT ID,
val,
CASE WHEN row_number() OVER (PARTITION BY ID ORDER BY val) = 1 THEN ID END id_distinct
FROM sd);
ID VAL CNT_DISINCT_IDS
---------- ---------- ---------------
1 10 1
1 20 1
2 30 2
2 40 2
4 50 3
4 60 3
6 70 2
ETA:证明上述技术适用于您的数据:
WITH your_table AS (SELECT 'ABCDE' account_sk, 23 day_sk FROM dual UNION ALL
SELECT 'ABCDE' account_sk, 23 day_sk FROM dual UNION ALL
SELECT 'ABCDE' account_sk, 24 day_sk FROM dual UNION ALL
SELECT 'ABCDE' account_sk, 25 day_sk FROM dual UNION ALL
SELECT 'ABCDE' account_sk, 53 day_sk FROM dual UNION ALL
SELECT 'ABCDE' account_sk, 53 day_sk FROM dual UNION ALL
SELECT 'ABCDE' account_sk, 55 day_sk FROM dual UNION ALL
SELECT 'VWXYZ' account_sk, 10 day_sk FROM dual UNION ALL
SELECT 'VWXYZ' account_sk, 12 day_sk FROM dual UNION ALL
SELECT 'VWXYZ' account_sk, 40 day_sk FROM dual UNION ALL
SELECT 'VWXYZ' account_sk, 40 day_sk FROM dual)
SELECT account_sk,
day_sk,
COUNT(day_sk_distinct) OVER (PARTITION BY account_sk ORDER BY day_sk RANGE BETWEEN 29 PRECEDING AND CURRENT ROW) count_distinct_day_sk
FROM (SELECT account_sk,
day_sk,
CASE WHEN row_number() OVER (PARTITION BY account_sk, day_sk ORDER BY day_sk) = 1 THEN day_sk END day_sk_distinct
FROM your_table);
ACCOUNT_SK DAY_SK COUNT_DISTINCT_DAY_SK
---------- ---------- ---------------------
ABCDE 23 1
ABCDE 23 1
ABCDE 24 2
ABCDE 25 3
ABCDE 53 3
ABCDE 53 3
ABCDE 55 2
VWXYZ 10 1
VWXYZ 12 2
VWXYZ 40 2
VWXYZ 40 2
count(distinct ...)
与 over 子句配合使用效果很好,主要问题是排序依据。您不能执行 count (distinct ..) over (partition by ... order by ...)
,因为 DISTINCT 函数和 RATIO_TO_REPORT 不能有 ORDER BY。所以我这样做了:
select tr.*, count (distinct day_number) over (partition by account)
from (select t.*, row_number() over (partition by account order by day_number) row_number from table t) tr
where row_number < 30;
我在人力资源部的employees scheme中测试过(免费的oracle scheme随处可见) 我不确定它是否适用于您的架构,因为我没有它的副本,但如果没有,它应该会给您一些想法:
select count (distinct manager_id) over (partition by department_id), department_id, manager_id
from (select e.*, row_number() over (partition by department_id order by employee_id) row_number from employees e)
where row_number < 30;
您可以通过创建一个列来模拟这一点,当“新中断”出现在有序列表中时该列为 1,否则为 null。然后你只需要计算或总结这些突破指标。 count() 和 sum() 都支持“over(order by...)”。 在您的示例中,它将是:
with TR as
(select 1 as PK, 'ABCDE' as ACCOUNT, 23 as DAY_number from dual
union all select 2, 'ABCDE', 23 from dual
union all select 3, 'ABCDE', 24 from dual
union all select 4, 'ABCDE', 25 from dual
)
select tr.*
, count( /*distinct*/ day_number) OVER (PARTITION BY ACCOUNT ORDER BY DAY_number range 29 PRECEDING) as wrong_result
, count(IS_NEW_BREAK) over(PARTITION BY ACCOUNT order by day_number range 29 PRECEDING) as desired_output
from
(select tr.*
, case min(case when day_number is not null then PK end) over(PARTITION BY ACCOUNT, day_number) when PK then 1 end as IS_NEW_BREAK
from tr
) tr;