如果间隔不是 "full",如何使 OVER() 中的结果无效
How to nullify result in OVER() if the interval isn't "full"
假设我有以下数据集和查询:
WITH results as (
SELECT 'DAL' as team, 2010 as season, 6 as wins union
SELECT 'DET' as team, 2010 as season, 6 as wins union
SELECT 'DET' as team, 2011 as season, 10 as wins union
SELECT 'DET' as team, 2012 as season, 4 as wins union
SELECT 'DET' as team, 2013 as season, 7 as wins union
SELECT 'DET' as team, 2014 as season, 11 as wins union
SELECT 'DET' as team, 2015 as season, 7 as wins union
SELECT 'DET' as team, 2016 as season, 9 as wins union
SELECT 'DET' as team, 2017 as season, 9 as wins union
SELECT 'DET' as team, 2018 as season, 6 as wins union
SELECT 'DET' as team, 2019 as season, 3 as wins
) SELECT team, season, wins,
AVG(wins) OVER (partition by team order by season rows between 2 preceding and current row) AS avg_wins_last_two_seasons
from results;
这将return以下结果:
# team, season, wins, avg_wins_last_two_seasons
DAL, 2010, 6, 6.0000
DET, 2010, 6, 6.0000
DET, 2011, 10, 8.0000
但是,如果 window 的大小不是 'complete' - 即前面没有两行 - 那么我希望结果为 NULL
而不是对减小的 window 大小取平均值——例如,对于 2010
,它将 avg
计算为 [6]/1 = 6
,但我希望它计算为:[NULL, NULL, 6] / 3 = NULL
.我该怎么做?
您可以使用 row_number()
和 case
表达式:
select team, season, wins,
case when row_number() over(partition by team order by season) > 2
then avg(wins) over (
partition by team
order by season
rows between 2 preceding and current row
)
end as avg_wins_last_two_seasons
from results;
这确保在计算 window 平均值之前至少有两行;如果没有,那么这个 returns null
代替。
@GMB 在这里有公认的答案,但这里有一个细微的变化,使用命名的 window 以提高可读性并显示两个选项(null 和允许不完整的 window 大小):
WITH results as (
SELECT 'DAL' as team, 2010 as season, 6 as wins union
SELECT 'DET' as team, 2010 as season, 6 as wins union
SELECT 'DET' as team, 2011 as season, 10 as wins union
SELECT 'DET' as team, 2012 as season, 4 as wins union
SELECT 'DET' as team, 2013 as season, 7 as wins union
SELECT 'DET' as team, 2014 as season, 11 as wins union
SELECT 'DET' as team, 2015 as season, 7 as wins union
SELECT 'DET' as team, 2016 as season, 9 as wins union
SELECT 'DET' as team, 2017 as season, 9 as wins union
SELECT 'DET' as team, 2018 as season, 6 as wins union
SELECT 'DET' as team, 2019 as season, 3 as wins
) SELECT team, season, wins
,ROUND(AVG(wins) OVER trailing_2, 1) AS avg_wins_trailing_2
,IF(COUNT(1) OVER trailing_2 < 3, NULL, ROUND(AVG(wins) OVER trailing_2,1))AS avg_wins_trailing_2_if_full_window
FROM results
WINDOW trailing_2 AS (
PARTITION BY team
ORDER BY season
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
);
# team, season, wins, avg_wins_trailing_2, avg_wins_trailing_2_if_full_window
DAL, 2010, 6, 6.0, null
DET, 2010, 6, 6.0, null
DET, 2011, 10, 8.0, null
DET, 2012, 4, 6.7, 6.7
DET, 2013, 7, 7.0, 7.0
假设我有以下数据集和查询:
WITH results as (
SELECT 'DAL' as team, 2010 as season, 6 as wins union
SELECT 'DET' as team, 2010 as season, 6 as wins union
SELECT 'DET' as team, 2011 as season, 10 as wins union
SELECT 'DET' as team, 2012 as season, 4 as wins union
SELECT 'DET' as team, 2013 as season, 7 as wins union
SELECT 'DET' as team, 2014 as season, 11 as wins union
SELECT 'DET' as team, 2015 as season, 7 as wins union
SELECT 'DET' as team, 2016 as season, 9 as wins union
SELECT 'DET' as team, 2017 as season, 9 as wins union
SELECT 'DET' as team, 2018 as season, 6 as wins union
SELECT 'DET' as team, 2019 as season, 3 as wins
) SELECT team, season, wins,
AVG(wins) OVER (partition by team order by season rows between 2 preceding and current row) AS avg_wins_last_two_seasons
from results;
这将return以下结果:
# team, season, wins, avg_wins_last_two_seasons
DAL, 2010, 6, 6.0000
DET, 2010, 6, 6.0000
DET, 2011, 10, 8.0000
但是,如果 window 的大小不是 'complete' - 即前面没有两行 - 那么我希望结果为 NULL
而不是对减小的 window 大小取平均值——例如,对于 2010
,它将 avg
计算为 [6]/1 = 6
,但我希望它计算为:[NULL, NULL, 6] / 3 = NULL
.我该怎么做?
您可以使用 row_number()
和 case
表达式:
select team, season, wins,
case when row_number() over(partition by team order by season) > 2
then avg(wins) over (
partition by team
order by season
rows between 2 preceding and current row
)
end as avg_wins_last_two_seasons
from results;
这确保在计算 window 平均值之前至少有两行;如果没有,那么这个 returns null
代替。
@GMB 在这里有公认的答案,但这里有一个细微的变化,使用命名的 window 以提高可读性并显示两个选项(null 和允许不完整的 window 大小):
WITH results as (
SELECT 'DAL' as team, 2010 as season, 6 as wins union
SELECT 'DET' as team, 2010 as season, 6 as wins union
SELECT 'DET' as team, 2011 as season, 10 as wins union
SELECT 'DET' as team, 2012 as season, 4 as wins union
SELECT 'DET' as team, 2013 as season, 7 as wins union
SELECT 'DET' as team, 2014 as season, 11 as wins union
SELECT 'DET' as team, 2015 as season, 7 as wins union
SELECT 'DET' as team, 2016 as season, 9 as wins union
SELECT 'DET' as team, 2017 as season, 9 as wins union
SELECT 'DET' as team, 2018 as season, 6 as wins union
SELECT 'DET' as team, 2019 as season, 3 as wins
) SELECT team, season, wins
,ROUND(AVG(wins) OVER trailing_2, 1) AS avg_wins_trailing_2
,IF(COUNT(1) OVER trailing_2 < 3, NULL, ROUND(AVG(wins) OVER trailing_2,1))AS avg_wins_trailing_2_if_full_window
FROM results
WINDOW trailing_2 AS (
PARTITION BY team
ORDER BY season
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
);
# team, season, wins, avg_wins_trailing_2, avg_wins_trailing_2_if_full_window
DAL, 2010, 6, 6.0, null
DET, 2010, 6, 6.0, null
DET, 2011, 10, 8.0, null
DET, 2012, 4, 6.7, 6.7
DET, 2013, 7, 7.0, 7.0