在 Python Pandas 数据帧中转换区间外连接 SQL
Convert Interval Outer Join SQL in Python Pandas Dataframe
我正在转换一个 Oracle SQL 外部间隔连接到 Pandas 数据帧。下面是 Oracle SQL:
WITH df_interval AS
(SELECT '1' id,
'AAA' interval,
1000 begin,
2000 end
FROM DUAL
UNION ALL
SELECT '1' id,
'BBB' intrvl,
2100 begin,
3000 end
FROM DUAL
UNION ALL
SELECT '2' id,
'CCC' intrvl,
3100 begin,
4000 end
FROM DUAL
UNION ALL
SELECT '2' id,
'DDD' intrvl,
4100 begin,
5000 end
FROM DUAL),
df_point AS
(SELECT '1' id, 'X1' point, 1100 mid FROM DUAL
UNION ALL
SELECT '1' id, 'X2' point, 2050 mid FROM DUAL
UNION ALL
SELECT '1' id, 'X3' point, 3200 mid FROM DUAL
UNION ALL
SELECT '2' id, 'X4' point, 4200 mid FROM DUAL
UNION ALL
SELECT '2' id, 'X5' point, 5500 mid FROM DUAL)
SELECT pt.id,
point,
mid,
interval
FROM df_interval it RIGHT OUTER JOIN df_point pt ON pt.id = it.id AND pt.mid BETWEEN it.begin AND it.end
我尝试创建数据框,但我无法像上面那样以 'RIGHT OUTER JOIN interval' 的身份加入 Oracle SQL:
import pandas as pd
df_interval = pd.DataFrame({
'ID':['1','1','2','2'],
'interval': ['AAA', 'BBB', 'CCC', 'DDD'],
'begin': [1000,2100,3100,4100],
'end': [2000, 3000,4000,5000]})
df_point = pd.DataFrame({
'ID':['1','1','1','2','2'],
'point': ['X1', 'X2', 'X3', 'X4','X5'],
'mid': [1100,2050,3200,4200,5500]})
我希望输出是这样的:
df_out = pd.DataFrame({
'ID':['1','1','1','2','2'],
'mid': [1100,2050,3200,4200,5500],
'intrvl':['AAA','','','DDD','']})
感谢任何人可以帮助我吗?
我觉得merge_asof
对你的情况来说是完美的,唯一不同的是我们需要做两次,当结束和开始合并结果相同时,那个间隔应该是匹配的间隔
s1=pd.merge_asof(df_point,df_interval,by='ID',left_on='mid',right_on='end',direction='forward')
s2=pd.merge_asof(df_point,df_interval,by='ID',left_on='mid',right_on='begin',direction='backward')
s1.interval=s1.interval.where(s1.interval==s2.interval)
s1.drop(['end','begin'],1,inplace=True)
s1
ID point mid interval
0 1 X1 1100 AAA
1 1 X2 2050 NaN
2 1 X3 3200 NaN
3 2 X4 4200 DDD
4 2 X5 5500 NaN
我正在转换一个 Oracle SQL 外部间隔连接到 Pandas 数据帧。下面是 Oracle SQL:
WITH df_interval AS
(SELECT '1' id,
'AAA' interval,
1000 begin,
2000 end
FROM DUAL
UNION ALL
SELECT '1' id,
'BBB' intrvl,
2100 begin,
3000 end
FROM DUAL
UNION ALL
SELECT '2' id,
'CCC' intrvl,
3100 begin,
4000 end
FROM DUAL
UNION ALL
SELECT '2' id,
'DDD' intrvl,
4100 begin,
5000 end
FROM DUAL),
df_point AS
(SELECT '1' id, 'X1' point, 1100 mid FROM DUAL
UNION ALL
SELECT '1' id, 'X2' point, 2050 mid FROM DUAL
UNION ALL
SELECT '1' id, 'X3' point, 3200 mid FROM DUAL
UNION ALL
SELECT '2' id, 'X4' point, 4200 mid FROM DUAL
UNION ALL
SELECT '2' id, 'X5' point, 5500 mid FROM DUAL)
SELECT pt.id,
point,
mid,
interval
FROM df_interval it RIGHT OUTER JOIN df_point pt ON pt.id = it.id AND pt.mid BETWEEN it.begin AND it.end
我尝试创建数据框,但我无法像上面那样以 'RIGHT OUTER JOIN interval' 的身份加入 Oracle SQL:
import pandas as pd
df_interval = pd.DataFrame({
'ID':['1','1','2','2'],
'interval': ['AAA', 'BBB', 'CCC', 'DDD'],
'begin': [1000,2100,3100,4100],
'end': [2000, 3000,4000,5000]})
df_point = pd.DataFrame({
'ID':['1','1','1','2','2'],
'point': ['X1', 'X2', 'X3', 'X4','X5'],
'mid': [1100,2050,3200,4200,5500]})
我希望输出是这样的:
df_out = pd.DataFrame({
'ID':['1','1','1','2','2'],
'mid': [1100,2050,3200,4200,5500],
'intrvl':['AAA','','','DDD','']})
感谢任何人可以帮助我吗?
我觉得merge_asof
对你的情况来说是完美的,唯一不同的是我们需要做两次,当结束和开始合并结果相同时,那个间隔应该是匹配的间隔
s1=pd.merge_asof(df_point,df_interval,by='ID',left_on='mid',right_on='end',direction='forward')
s2=pd.merge_asof(df_point,df_interval,by='ID',left_on='mid',right_on='begin',direction='backward')
s1.interval=s1.interval.where(s1.interval==s2.interval)
s1.drop(['end','begin'],1,inplace=True)
s1
ID point mid interval
0 1 X1 1100 AAA
1 1 X2 2050 NaN
2 1 X3 3200 NaN
3 2 X4 4200 DDD
4 2 X5 5500 NaN