选择和使用每个分区组的运算符
Selecting and using operators per partition group
我有以下存储体育赛事数据的 table。我正在尝试找出返回同一国家/地区后哪些活动的出席人数有所增加。
+----------+------------+-----------+-------------+------------+
| EventId | EventName | Country | Attendance | EventDate |
+----------+------------+-----------+-------------+------------+
| 1 | Soccer1 | Australia | 12000 | 2015-01-01 |
| 2 | Soccer2 | Mexico | 35999 | 2016-02-02 |
| 3 | Soccer3 | Australia | 13999 | 2015-03-22 |
| 4 | Football1 | Japan | 13555 | 2003-11-12 |
| 5 | Football2 | Japan | 12222 | 2004-01-01 |
| 6 | Football3 | Canada | 13444 | 2003-02-23 |
| 7 | Tennis1 | America | 10000 | 2014-01-02 |
| 8 | Tennis2 | America | 12111 | 2015-10-01 |
+----------+------------+-----------+-------------+------------+
CREATE TABLE [dbo].[Sports](
[EventId] [int] NULL,
[EventName] [varchar](50) NULL,
[Country] [varchar](50) NULL,
[Attendance] [int] NULL,
[EventDate] [date] NULL
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (1, N'Soccer1', N'Australia', 12000, CAST(N'2015-01-01' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (2, N'Soccer2', N'Mexico', 35999, CAST(N'2016-02-02' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (3, N'Soccer3', N'Australia', 13999, CAST(N'2015-03-22' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (4, N'Football1', N'Japan', 13555, CAST(N'2003-11-12' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (5, N'Football2', N'Japan', 12222, CAST(N'2004-01-01' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (6, N'Football3', N'Canada', 13444, CAST(N'2003-02-23' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (7, N'Tennis1', N'America', 10000, CAST(N'2014-01-02' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (8, N'Tennis2', N'America', 12111, CAST(N'2015-10-01' AS Date))
所以我创建了一个 CTE 来查找哪些国家/地区多次举办过活动,然后尝试使用 eventdate
订购的 lead
函数来查看最近的日期是否有更高的出席率.
但是当满足这些条件时,我想同时显示出勤率较高的行和上一个事件的行。我的尝试只显示最近日期的行。
with t as (
select EventId, EventName, Country, Attendance, CONVERT(VARCHAR(11),EventDate,106) as Date
from Sports
where Country in (select Country from Sports group by Country having count(*)>=2)
),
s as (
select *, LEAD(Attendance) over (partition by Country order by cast([Date] as datetime) desc) as PrevAttendance
from t
)
select EventName, Country, Attendance, Date
from s
where Attendance > PrevAttendance
所以我的问题也是,通常如何对 SQL Server 2014 中的每个 partition/window 集执行操作。因为在这个例子中,如果每个国家/地区有更多的事件,我可能还想检查最近的出席人数是否至少多于之前的任何活动,而不仅仅是最近的活动。
希望这是有道理的,在此先感谢您的帮助。
所以我对此 table 的预期结果如下:
+----------+------------+-----------+-------------+------------+
| EventId | EventName | Country | Attendance | EventDate |
+----------+------------+-----------+-------------+------------+
| 1 | Soccer1 | Australia | 12000 | 2015-01-01 |
| 3 | Soccer3 | Australia | 13999 | 2015-03-22 |
| 7 | Tennis1 | America | 10000 | 2014-01-02 |
| 8 | Tennis2 | America | 12111 | 2015-10-01 |
+----------+------------+-----------+-------------+------------+
SET NOCOUNT ON;
CREATE TABLE #sports(
[EventId] [int] NULL,
[EventName] [varchar](50) NULL,
[Country] [varchar](50) NULL,
[Attendance] [int] NULL,
[EventDate] [date] NULL
);
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (1, N'Soccer1', N'Australia', 12000, CAST(N'2015-01-01' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (2, N'Soccer2', N'Mexico', 35999, CAST(N'2016-02-02' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (3, N'Soccer3', N'Australia', 13999, CAST(N'2015-03-22' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (4, N'Football1', N'Japan', 13555, CAST(N'2003-11-12' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (5, N'Football2', N'Japan', 12222, CAST(N'2004-01-01' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (6, N'Football3', N'Canada', 13444, CAST(N'2003-02-23' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (7, N'Tennis1', N'America', 10000, CAST(N'2014-01-02' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (8, N'Tennis2', N'America', 12111, CAST(N'2015-10-01' AS Date))
;WITH cte AS (
SELECT
*,
pat=CASE
WHEN LAG(Attendance) OVER (PARTITION BY Country ORDER BY EventId)IS NULL THEN 0
WHEN Attendance<LAG(Attendance) OVER (PARTITION BY Country ORDER BY EventId) THEN 0
ELSE 1
END,
patid=LAG(EventId) OVER (PARTITION BY Country ORDER BY EventId)
FROM
#sports
)
SELECT
[EventId], [EventName], [Country], [Attendance], [EventDate]
FROM
cte
WHERE
pat=1
UNION
SELECT
inn.[EventId], inn.[EventName], inn.[Country], inn.[Attendance], inn.[EventDate]
FROM
cte AS ou
INNER JOIN cte AS inn ON
inn.EventId=ou.patid
WHERE
ou.pat=1
ORDER BY
EventId;
DROP TABLE #sports;
结果:
+---------+-----------+-----------+------------+------------+
| EventId | EventName | Country | Attendance | EventDate |
+---------+-----------+-----------+------------+------------+
| 1 | Soccer1 | Australia | 12000 | 2015-01-01 |
| 3 | Soccer3 | Australia | 13999 | 2015-03-22 |
| 7 | Tennis1 | America | 10000 | 2014-01-02 |
| 8 | Tennis2 | America | 12111 | 2015-10-01 |
+---------+-----------+-----------+------------+------------+
使用ROW_NUMBER
获取最近的两行。
with
cte as (
select *,
lead(Attendance) -- next row
over (partition by Country
order by EventDate desc) as PrevAttendance,
lag(Attendance) -- previous row
over (partition by Country
order by EventDate desc) as NextAttendance,
ROW_NUMBER()
over (partition by Country
order by EventDate desc) as rn
from sports
)
select *
from cte
-- only the last two rows
-- when last row's value is greater than previous row's value
where (rn = 1 and Attendance > PrevAttendance)
or (rn = 2 and Attendance < NextAttendance)
使用累积 MIN 查找所有先前行的最小值:
MIN(Attendance)
over (partition by Country
order by EventDate desc
-- all rows before the current row
rows between unbounded preceding and 1 preceding)
我有以下存储体育赛事数据的 table。我正在尝试找出返回同一国家/地区后哪些活动的出席人数有所增加。
+----------+------------+-----------+-------------+------------+
| EventId | EventName | Country | Attendance | EventDate |
+----------+------------+-----------+-------------+------------+
| 1 | Soccer1 | Australia | 12000 | 2015-01-01 |
| 2 | Soccer2 | Mexico | 35999 | 2016-02-02 |
| 3 | Soccer3 | Australia | 13999 | 2015-03-22 |
| 4 | Football1 | Japan | 13555 | 2003-11-12 |
| 5 | Football2 | Japan | 12222 | 2004-01-01 |
| 6 | Football3 | Canada | 13444 | 2003-02-23 |
| 7 | Tennis1 | America | 10000 | 2014-01-02 |
| 8 | Tennis2 | America | 12111 | 2015-10-01 |
+----------+------------+-----------+-------------+------------+
CREATE TABLE [dbo].[Sports](
[EventId] [int] NULL,
[EventName] [varchar](50) NULL,
[Country] [varchar](50) NULL,
[Attendance] [int] NULL,
[EventDate] [date] NULL
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (1, N'Soccer1', N'Australia', 12000, CAST(N'2015-01-01' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (2, N'Soccer2', N'Mexico', 35999, CAST(N'2016-02-02' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (3, N'Soccer3', N'Australia', 13999, CAST(N'2015-03-22' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (4, N'Football1', N'Japan', 13555, CAST(N'2003-11-12' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (5, N'Football2', N'Japan', 12222, CAST(N'2004-01-01' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (6, N'Football3', N'Canada', 13444, CAST(N'2003-02-23' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (7, N'Tennis1', N'America', 10000, CAST(N'2014-01-02' AS Date))
INSERT [dbo].[Sports] ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (8, N'Tennis2', N'America', 12111, CAST(N'2015-10-01' AS Date))
所以我创建了一个 CTE 来查找哪些国家/地区多次举办过活动,然后尝试使用 eventdate
订购的 lead
函数来查看最近的日期是否有更高的出席率.
但是当满足这些条件时,我想同时显示出勤率较高的行和上一个事件的行。我的尝试只显示最近日期的行。
with t as (
select EventId, EventName, Country, Attendance, CONVERT(VARCHAR(11),EventDate,106) as Date
from Sports
where Country in (select Country from Sports group by Country having count(*)>=2)
),
s as (
select *, LEAD(Attendance) over (partition by Country order by cast([Date] as datetime) desc) as PrevAttendance
from t
)
select EventName, Country, Attendance, Date
from s
where Attendance > PrevAttendance
所以我的问题也是,通常如何对 SQL Server 2014 中的每个 partition/window 集执行操作。因为在这个例子中,如果每个国家/地区有更多的事件,我可能还想检查最近的出席人数是否至少多于之前的任何活动,而不仅仅是最近的活动。
希望这是有道理的,在此先感谢您的帮助。
所以我对此 table 的预期结果如下:
+----------+------------+-----------+-------------+------------+
| EventId | EventName | Country | Attendance | EventDate |
+----------+------------+-----------+-------------+------------+
| 1 | Soccer1 | Australia | 12000 | 2015-01-01 |
| 3 | Soccer3 | Australia | 13999 | 2015-03-22 |
| 7 | Tennis1 | America | 10000 | 2014-01-02 |
| 8 | Tennis2 | America | 12111 | 2015-10-01 |
+----------+------------+-----------+-------------+------------+
SET NOCOUNT ON;
CREATE TABLE #sports(
[EventId] [int] NULL,
[EventName] [varchar](50) NULL,
[Country] [varchar](50) NULL,
[Attendance] [int] NULL,
[EventDate] [date] NULL
);
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (1, N'Soccer1', N'Australia', 12000, CAST(N'2015-01-01' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (2, N'Soccer2', N'Mexico', 35999, CAST(N'2016-02-02' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (3, N'Soccer3', N'Australia', 13999, CAST(N'2015-03-22' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (4, N'Football1', N'Japan', 13555, CAST(N'2003-11-12' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (5, N'Football2', N'Japan', 12222, CAST(N'2004-01-01' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (6, N'Football3', N'Canada', 13444, CAST(N'2003-02-23' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (7, N'Tennis1', N'America', 10000, CAST(N'2014-01-02' AS Date))
INSERT INTO #sports ([EventId], [EventName], [Country], [Attendance], [EventDate]) VALUES (8, N'Tennis2', N'America', 12111, CAST(N'2015-10-01' AS Date))
;WITH cte AS (
SELECT
*,
pat=CASE
WHEN LAG(Attendance) OVER (PARTITION BY Country ORDER BY EventId)IS NULL THEN 0
WHEN Attendance<LAG(Attendance) OVER (PARTITION BY Country ORDER BY EventId) THEN 0
ELSE 1
END,
patid=LAG(EventId) OVER (PARTITION BY Country ORDER BY EventId)
FROM
#sports
)
SELECT
[EventId], [EventName], [Country], [Attendance], [EventDate]
FROM
cte
WHERE
pat=1
UNION
SELECT
inn.[EventId], inn.[EventName], inn.[Country], inn.[Attendance], inn.[EventDate]
FROM
cte AS ou
INNER JOIN cte AS inn ON
inn.EventId=ou.patid
WHERE
ou.pat=1
ORDER BY
EventId;
DROP TABLE #sports;
结果:
+---------+-----------+-----------+------------+------------+
| EventId | EventName | Country | Attendance | EventDate |
+---------+-----------+-----------+------------+------------+
| 1 | Soccer1 | Australia | 12000 | 2015-01-01 |
| 3 | Soccer3 | Australia | 13999 | 2015-03-22 |
| 7 | Tennis1 | America | 10000 | 2014-01-02 |
| 8 | Tennis2 | America | 12111 | 2015-10-01 |
+---------+-----------+-----------+------------+------------+
使用ROW_NUMBER
获取最近的两行。
with
cte as (
select *,
lead(Attendance) -- next row
over (partition by Country
order by EventDate desc) as PrevAttendance,
lag(Attendance) -- previous row
over (partition by Country
order by EventDate desc) as NextAttendance,
ROW_NUMBER()
over (partition by Country
order by EventDate desc) as rn
from sports
)
select *
from cte
-- only the last two rows
-- when last row's value is greater than previous row's value
where (rn = 1 and Attendance > PrevAttendance)
or (rn = 2 and Attendance < NextAttendance)
使用累积 MIN 查找所有先前行的最小值:
MIN(Attendance)
over (partition by Country
order by EventDate desc
-- all rows before the current row
rows between unbounded preceding and 1 preceding)