在 SQL 服务器中添加带有规则集的新列
Adding new column with set of rules in SQL Server
我有一套有点复杂的规则,我需要 运行 来对抗 table。问题如下:我有一个存储医疗记录的 table,我需要确定一个人在出院日期后去的第一个站点。出院日期是 end_date,位置是 'initial'(这将是每个组的第一行)。 table 按 ID 分组并按如下所示的格式排序。
有 3 条规则:(1) 在 ID 组内,如果任何行的 begin_date 与第一行 end_date 匹配,则 return 该位置为第一个站点(如果有两行满足此条件,均正确,则首选第一个)。 (2) 如果第一个选项不存在,则如果存在患者位置 'Health' 的实例,则 return 'Health'。 (3)else,如果条件1和2不存在,则return'Home'
table
ID color begin_date end_date location
1 red 2017-01-01 2017-01-07 initial
1 green 2017-01-05 2017-01-07 nursing
1 blue 2017-01-07 2017-01-15 rehab
1 red 2017-01-11 2017-01-22 Health
2 red 2017-02-22 2017-02-26 initial
2 green 2017-02-26 2017-02-28 nursing
2 blue 2017-02-26 2017-02-28 rehab
3 red 2017-03-11 2017-03-22 initial
4 red 2017-04-01 2017-04-07 initial
4 green 2017-04-05 2017-04-07 nursing
4 blue 2017-04-10 2017-04-15 Health
最终结果:
ID first_site
1 rehab
2 nursing
3 home
4 Health
在 sql-server 2008 中我的尝试:(旁注:我考虑添加一个辅助列,它是每一行的 'initial' 位置的 end_date,这样更容易在一行内进行比较。不确定是否有必要)。感谢您的指导!
SELECT
ID,
OVER( PARTITION ID CASE WHEN end_date[0] = begin_date THEN location
WHEN location = 'Health' THEN 'Health'
ELSE 'Home' end) AS [first_site]
FROM table
在 python 中,我得到了这个答案:
def conditions(x):
#compare each group first
val = x.loc[x['begin_date'] == x['end_date'].iloc[0], 'location']
#if at least one match (not return empty `Series` get first value)
if not val.empty:
return val.iloc[0]
#if value is empty, check if value 'Health' exists within the group
elif (x['location'] == 'Health').any():
return 'Health'
else:
return 'Home'
final = df.groupby('ID').apply(conditions).reset_index(name='first_site')
declare @example table (
ExampleID int identity(1,1) not null primary key clustered
, ID int not null
, Color nvarchar(255) not null
, BeginDate date not null
, EndDate date not null
, Loc nvarchar(255) not null
);
insert into @example (ID, color, begindate, enddate, loc)
select 1, 'red' , '2017-01-01', '2017-01-07', 'initial' union all
select 1, 'green' , '2017-01-05', '2017-01-07', 'nursing' union all
select 1, 'blue' , '2017-01-07', '2017-01-15', 'rehab' union all
select 1, 'red' , '2017-01-11', '2017-01-22', 'Health' union all
select 2, 'red' , '2017-02-22', '2017-02-26', 'initial' union all
select 2, 'green' , '2017-02-26', '2017-02-28', 'nursing' union all
select 2, 'blue' , '2017-02-26', '2017-02-28', 'rehab' union all
select 3, 'red' , '2017-03-11', '2017-03-22', 'initial' union all
select 4, 'red' , '2017-04-01', '2017-04-07', 'initial' union all
select 4, 'green' , '2017-04-05', '2017-04-07', 'nursing' union all
select 4, 'blue' , '2017-04-10', '2017-04-15', 'Health';
with cte as (
select a.ID
, a.Color
, a.BeginDate
, a.EndDate
, b.Loc
, rank() over(partition by a.ID order by a.ID, a.begindate, b.enddate desc, b.loc) Ranking
from @example a
left join @example b
on a.EndDate = b.BeginDate
)
, cte2 as (
select id
, Loc
from @example
where loc = 'health'
)
select a.ID
, COALESCE(a.loc, b.loc, 'Home') as Loc
from cte a
left join cte2 b
on a.id = b.id
where Ranking = 1
Output:
ID Loc
1 rehab
2 nursing
3 home
4 Health
这可以通过一个窗口函数来实现,该函数对与 initial
结束同一天开始的访问进行排名,然后是几个简单的 join
s:
declare @t table(ID int,color varchar(20),begin_date date,end_date date,location varchar(20));
insert into @t values(1,'red','20170101','20170107','initial'),(1,'green','20170105','20170107','nursing'),(1,'blue','20170107','20170115','rehab'),(1,'red','20170111','20170122','Health'),(2,'red','20170222','20170226','initial'),(2,'green','20170226','20170228','nursing'),(2,'blue','20170226','20170228','rehab'),(3,'red','20170311','20170322','initial'),(4,'red','20170401','20170407','initial'),(4,'green','20170405','20170407','nursing'),(4,'blue','20170410','20170415','Health');
with d as
(
select ID
,color
,begin_date
,end_date
,location
,row_number() over (partition by ID
,begin_date
order by case when location = 'initial' then '29990101' else begin_date end
) as r
from @t
)
select i.ID
,isnull(d.location,isnull(h.location,'Home')) as first_site
from d as i
left join d
on i.end_date = d.begin_date
and d.r = 1
left join d as h
on i.ID = h.ID
and h.location = 'Health'
where i.location = 'initial'
;
输出:
+----+------------+
| ID | first_site |
+----+------------+
| 1 | rehab |
| 2 | nursing |
| 3 | Home |
| 4 | Health |
+----+------------+
我有一套有点复杂的规则,我需要 运行 来对抗 table。问题如下:我有一个存储医疗记录的 table,我需要确定一个人在出院日期后去的第一个站点。出院日期是 end_date,位置是 'initial'(这将是每个组的第一行)。 table 按 ID 分组并按如下所示的格式排序。
有 3 条规则:(1) 在 ID 组内,如果任何行的 begin_date 与第一行 end_date 匹配,则 return 该位置为第一个站点(如果有两行满足此条件,均正确,则首选第一个)。 (2) 如果第一个选项不存在,则如果存在患者位置 'Health' 的实例,则 return 'Health'。 (3)else,如果条件1和2不存在,则return'Home'
table
ID color begin_date end_date location
1 red 2017-01-01 2017-01-07 initial
1 green 2017-01-05 2017-01-07 nursing
1 blue 2017-01-07 2017-01-15 rehab
1 red 2017-01-11 2017-01-22 Health
2 red 2017-02-22 2017-02-26 initial
2 green 2017-02-26 2017-02-28 nursing
2 blue 2017-02-26 2017-02-28 rehab
3 red 2017-03-11 2017-03-22 initial
4 red 2017-04-01 2017-04-07 initial
4 green 2017-04-05 2017-04-07 nursing
4 blue 2017-04-10 2017-04-15 Health
最终结果:
ID first_site
1 rehab
2 nursing
3 home
4 Health
在 sql-server 2008 中我的尝试:(旁注:我考虑添加一个辅助列,它是每一行的 'initial' 位置的 end_date,这样更容易在一行内进行比较。不确定是否有必要)。感谢您的指导!
SELECT
ID,
OVER( PARTITION ID CASE WHEN end_date[0] = begin_date THEN location
WHEN location = 'Health' THEN 'Health'
ELSE 'Home' end) AS [first_site]
FROM table
在 python 中,我得到了这个答案:
def conditions(x):
#compare each group first
val = x.loc[x['begin_date'] == x['end_date'].iloc[0], 'location']
#if at least one match (not return empty `Series` get first value)
if not val.empty:
return val.iloc[0]
#if value is empty, check if value 'Health' exists within the group
elif (x['location'] == 'Health').any():
return 'Health'
else:
return 'Home'
final = df.groupby('ID').apply(conditions).reset_index(name='first_site')
declare @example table (
ExampleID int identity(1,1) not null primary key clustered
, ID int not null
, Color nvarchar(255) not null
, BeginDate date not null
, EndDate date not null
, Loc nvarchar(255) not null
);
insert into @example (ID, color, begindate, enddate, loc)
select 1, 'red' , '2017-01-01', '2017-01-07', 'initial' union all
select 1, 'green' , '2017-01-05', '2017-01-07', 'nursing' union all
select 1, 'blue' , '2017-01-07', '2017-01-15', 'rehab' union all
select 1, 'red' , '2017-01-11', '2017-01-22', 'Health' union all
select 2, 'red' , '2017-02-22', '2017-02-26', 'initial' union all
select 2, 'green' , '2017-02-26', '2017-02-28', 'nursing' union all
select 2, 'blue' , '2017-02-26', '2017-02-28', 'rehab' union all
select 3, 'red' , '2017-03-11', '2017-03-22', 'initial' union all
select 4, 'red' , '2017-04-01', '2017-04-07', 'initial' union all
select 4, 'green' , '2017-04-05', '2017-04-07', 'nursing' union all
select 4, 'blue' , '2017-04-10', '2017-04-15', 'Health';
with cte as (
select a.ID
, a.Color
, a.BeginDate
, a.EndDate
, b.Loc
, rank() over(partition by a.ID order by a.ID, a.begindate, b.enddate desc, b.loc) Ranking
from @example a
left join @example b
on a.EndDate = b.BeginDate
)
, cte2 as (
select id
, Loc
from @example
where loc = 'health'
)
select a.ID
, COALESCE(a.loc, b.loc, 'Home') as Loc
from cte a
left join cte2 b
on a.id = b.id
where Ranking = 1
Output:
ID Loc
1 rehab
2 nursing
3 home
4 Health
这可以通过一个窗口函数来实现,该函数对与 initial
结束同一天开始的访问进行排名,然后是几个简单的 join
s:
declare @t table(ID int,color varchar(20),begin_date date,end_date date,location varchar(20));
insert into @t values(1,'red','20170101','20170107','initial'),(1,'green','20170105','20170107','nursing'),(1,'blue','20170107','20170115','rehab'),(1,'red','20170111','20170122','Health'),(2,'red','20170222','20170226','initial'),(2,'green','20170226','20170228','nursing'),(2,'blue','20170226','20170228','rehab'),(3,'red','20170311','20170322','initial'),(4,'red','20170401','20170407','initial'),(4,'green','20170405','20170407','nursing'),(4,'blue','20170410','20170415','Health');
with d as
(
select ID
,color
,begin_date
,end_date
,location
,row_number() over (partition by ID
,begin_date
order by case when location = 'initial' then '29990101' else begin_date end
) as r
from @t
)
select i.ID
,isnull(d.location,isnull(h.location,'Home')) as first_site
from d as i
left join d
on i.end_date = d.begin_date
and d.r = 1
left join d as h
on i.ID = h.ID
and h.location = 'Health'
where i.location = 'initial'
;
输出:
+----+------------+
| ID | first_site |
+----+------------+
| 1 | rehab |
| 2 | nursing |
| 3 | Home |
| 4 | Health |
+----+------------+