在 google bigquery 中查询 distinct
Query distinct in google bigquery
我有一个包含四列的 table,如下所示:
id,name, key, date
1,'A' ,'x1','2015-11-11'
2,'A' ,'x1','2015-11-11'
3,'B' ,'x2','2015-11-11'
4,'B' ,'x2','2015-11-11'
5,'A' ,'x1','2015-11-12'
6,'A' ,'x1','2015-11-12'
7,'B' ,'x2','2015-11-12'
8,'B' ,'x2','2015-11-12'
9,'D' ,'x3','2015-11-12'
10,'A' ,'x1','2015-12-11'
11,'A' ,'x1','2015-12-11'
12,'B' ,'x2','2015-12-11'
13,'B' ,'x2','2015-12-11'
14,'A' ,'x1','2015-12-12'
15,'A' ,'x1','2015-12-12'
16,'B' ,'x2','2015-12-12'
17,'B' ,'x2','2015-12-12'
18,'D' ,'x3','2015-12-12'
我想计算每个 date
的不同 new key
-s 的数量:
2015-11-11 2 -- (two distinct keys: x1 and x2)
2015-11-12 1 -- (one new key: x3)
2015-12-11 2 -- (two distinct keys: x1 and x2) - (different month 11)
2015-12-12 1 -- (one new key: x3) - (different month 11)
每个月只不同。
我该怎么做?
以下是我尝试解决的方法:
从日期中提取月份:
select key, date, regexp_extract(date, r'[\d]+-(\d\d)-\d\d') 月
从 t
按月分区,因为我们想从每个月的开始开始计数,并从月初计算不同的键
select 日期、月份、计数(不同键)
over(按月份分区,按日期行排序,在无界的前一行和当前行之间)cd from
(select key, date, regexp_extract(date, r'[\d]+-(\d\d)-\d\d') month from t)
选择每月不同的总计数
select 日期、月份、最大(cd) cd 来自(
select date, month, count(distinct key) over (partition by month order by date rows between unbounded preceding and current row) cd from
(select key, date, regexp_extract(date, r'[\d]+-(\d\d)-\d\d') 月从 t))
按 1、2
分组
对于每个日期,计算从上一日期的月初开始的唯一键总数:
select date, cd, lag(cd, 1) over (partition by month order by date) prev_cd from (
select 日期, 月份, max(cd) cd from (
select date, month, count(distinct key) over (partition by month order by date rows between unbounded preceding and current row) cd from
(select key, date, regexp_extract(date, r'[\d]+-(\d\d)-\d\d') 月从 t))
按 1、2)
分组
从当前日期减去之前的日期 - 这就是答案:
select 日期, cd - prev_cd 来自 (
select date, cd, lag(cd, 1) over (partition by month order by date) prev_cd from (
select 日期, 月份, max(cd) cd from (
select date, month, count(distinct key) over (partition by month order by date rows between unbounded preceding and current row) cd from
(select key, date, regexp_extract(date, r'[\d]+-(\d\d)-\d\d') month from moshap.wd))
按 1、2))
分组
这与您之前的问题完全相同 - 您只需要按月添加额外的 group/partition --> 查看 YearMonth 字段的使用
SELECT DATE, EXACT_COUNT_DISTINCT(key) AS keys
FROM (
SELECT DATE, key, LEAD(DATE) OVER(PARTITION BY key, YearMonth ORDER BY DATE DESC) AS new
FROM (SELECT DATE, LEFT(DATE, 7) AS YearMonth, key FROM YourTable GROUP BY 1, 2, 3)
) WHERE new IS NULL
GROUP BY DATE
ORDER BY DATE
我有一个包含四列的 table,如下所示:
id,name, key, date
1,'A' ,'x1','2015-11-11'
2,'A' ,'x1','2015-11-11'
3,'B' ,'x2','2015-11-11'
4,'B' ,'x2','2015-11-11'
5,'A' ,'x1','2015-11-12'
6,'A' ,'x1','2015-11-12'
7,'B' ,'x2','2015-11-12'
8,'B' ,'x2','2015-11-12'
9,'D' ,'x3','2015-11-12'
10,'A' ,'x1','2015-12-11'
11,'A' ,'x1','2015-12-11'
12,'B' ,'x2','2015-12-11'
13,'B' ,'x2','2015-12-11'
14,'A' ,'x1','2015-12-12'
15,'A' ,'x1','2015-12-12'
16,'B' ,'x2','2015-12-12'
17,'B' ,'x2','2015-12-12'
18,'D' ,'x3','2015-12-12'
我想计算每个 date
的不同 new key
-s 的数量:
2015-11-11 2 -- (two distinct keys: x1 and x2)
2015-11-12 1 -- (one new key: x3)
2015-12-11 2 -- (two distinct keys: x1 and x2) - (different month 11)
2015-12-12 1 -- (one new key: x3) - (different month 11)
每个月只不同。
我该怎么做?
以下是我尝试解决的方法:
从日期中提取月份:
select key, date, regexp_extract(date, r'[\d]+-(\d\d)-\d\d') 月 从 t
按月分区,因为我们想从每个月的开始开始计数,并从月初计算不同的键
select 日期、月份、计数(不同键) over(按月份分区,按日期行排序,在无界的前一行和当前行之间)cd from (select key, date, regexp_extract(date, r'[\d]+-(\d\d)-\d\d') month from t)
选择每月不同的总计数
select 日期、月份、最大(cd) cd 来自( select date, month, count(distinct key) over (partition by month order by date rows between unbounded preceding and current row) cd from (select key, date, regexp_extract(date, r'[\d]+-(\d\d)-\d\d') 月从 t)) 按 1、2
分组
对于每个日期,计算从上一日期的月初开始的唯一键总数:
select date, cd, lag(cd, 1) over (partition by month order by date) prev_cd from ( select 日期, 月份, max(cd) cd from ( select date, month, count(distinct key) over (partition by month order by date rows between unbounded preceding and current row) cd from (select key, date, regexp_extract(date, r'[\d]+-(\d\d)-\d\d') 月从 t)) 按 1、2)
分组
从当前日期减去之前的日期 - 这就是答案:
select 日期, cd - prev_cd 来自 ( select date, cd, lag(cd, 1) over (partition by month order by date) prev_cd from ( select 日期, 月份, max(cd) cd from ( select date, month, count(distinct key) over (partition by month order by date rows between unbounded preceding and current row) cd from (select key, date, regexp_extract(date, r'[\d]+-(\d\d)-\d\d') month from moshap.wd)) 按 1、2))
分组
这与您之前的问题完全相同 - 您只需要按月添加额外的 group/partition --> 查看 YearMonth 字段的使用
SELECT DATE, EXACT_COUNT_DISTINCT(key) AS keys
FROM (
SELECT DATE, key, LEAD(DATE) OVER(PARTITION BY key, YearMonth ORDER BY DATE DESC) AS new
FROM (SELECT DATE, LEFT(DATE, 7) AS YearMonth, key FROM YourTable GROUP BY 1, 2, 3)
) WHERE new IS NULL
GROUP BY DATE
ORDER BY DATE