将 tibble 与来自同一 tibble 的数据合并
Merge tibble with data from the same tibble
我有一些污染监测站的数据。我正在尝试创建一个新的 tibble,如果数据满足某些条件,它将被合并到列 elevation
的左侧。要满足的条件是 water_bodies (water_body
) 相等并且高程 (elevation
) 小于要合并到的观测值。
尝试用文字来解释这听起来很愚蠢,因为英语不是我的第一语言,但这是我正在寻找的示例,我认为这一切都清楚了。
这是数据:
tribble(
~id_emca, ~water_body, ~id_estmun, ~dbo, ~elevation,
"DLAGU20", "RIO CHICALOTE_16", 1005, 364.92, 1865,
"DLAGU20", "RIO CHICALOTE_17", 1005, 485.91, 1865,
"DLAGU20", "RIO CHICALOTE_18", 1005, 270.32, 1865,
"DLBAJ155", "RIO MULEGE_12", 3003, 20.3, 215,
"DLBAJ155", "RIO MULEGE_13", 3003, 14.005, 215,
"DLBAJ157", "RIO CHICALOTE_17", 3007, 315.34, 1677,
"DLBAJ157", "RIO CHICALOTE_18", 3007, 430.21, 1677,
"DLBAJ159", "RIO MULEGE_13", 3010, 17.89, 477,
"DLBAJ159", "RIO MULEGE_14", 3010, 35.98, 477,
"DLCAM80", "RIO CHICALOTE_18", 4012, 157.66, 1003,
"DLCAM85", "RIO CHICALOTE_18", 4009, 323.02, 573,
"DLCAM122", "RIO MULEGE_12", 4001, 37.81, 300
)
id_emca water_body id_estmun dbo elevation
<chr> <chr> <dbl> <dbl> <dbl>
1 DLAGU20 RIO CHICALOTE_16 1005 365. 1865
2 DLAGU20 RIO CHICALOTE_17 1005 486. 1865
3 DLAGU20 RIO CHICALOTE_18 1005 270. 1865
4 DLBAJ155 RIO MULEGE_12 3003 20.3 215
5 DLBAJ155 RIO MULEGE_13 3003 14.0 215
6 DLBAJ157 RIO CHICALOTE_17 3007 315. 1677
7 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677
8 DLBAJ159 RIO MULEGE_13 3010 17.9 477
9 DLBAJ159 RIO MULEGE_14 3010 36.0 477
10 DLCAM80 RIO CHICALOTE_18 4012 158. 1003
11 DLCAM85 RIO CHICALOTE_18 4009 323. 573
12 DLCAM122 RIO MULEGE_12 4001 37.8 300
新小标题应如下所示:
你可以用data.table
做一个non-equi join:
df <- tibble::tribble(
~id_emca, ~water_body, ~id_estmun, ~dbo, ~elevation,
"DLAGU20", "RIO CHICALOTE_16", 1005, 364.92, 1865,
"DLAGU20", "RIO CHICALOTE_17", 1005, 485.91, 1865,
"DLAGU20", "RIO CHICALOTE_18", 1005, 270.32, 1865,
"DLBAJ155", "RIO MULEGE_12", 3003, 20.3, 215,
"DLBAJ155", "RIO MULEGE_13", 3003, 14.005, 215,
"DLBAJ157", "RIO CHICALOTE_17", 3007, 315.34, 1677,
"DLBAJ157", "RIO CHICALOTE_18", 3007, 430.21, 1677,
"DLBAJ159", "RIO MULEGE_13", 3010, 17.89, 477,
"DLBAJ159", "RIO MULEGE_14", 3010, 35.98, 477,
"DLCAM80", "RIO CHICALOTE_18", 4012, 157.66, 1003,
"DLCAM85", "RIO CHICALOTE_18", 4009, 323.02, 573,
"DLCAM122", "RIO MULEGE_12", 4001, 37.81, 300
)
library(data.table)
setDT(df)
df[,elevation_join := elevation]
result <- df[df, on = .(water_body = water_body, elevation_join < elevation_join)]
result[,.(id_emca = i.id_emca,
water_body,
id_estmun = i.id_estmun,
dbo = i.dbo,
elevation = i.elevation,
id_emca_b = id_emca,
id_estmun_b = id_estmun,
dbo_b = dbo,
elevation_b = elevation)]
id_emca water_body id_estmun dbo elevation id_emca_b water_body_b id_estmun_b dbo_b elevation_b
1: DLAGU20 RIO CHICALOTE_16 1005 364.920 1865 <NA> <NA> NA NA NA
2: DLAGU20 RIO CHICALOTE_17 1005 485.910 1865 DLBAJ157 RIO CHICALOTE_17 3007 315.340 1677
3: DLAGU20 RIO CHICALOTE_18 1005 270.320 1865 DLBAJ157 RIO CHICALOTE_18 3007 430.210 1677
4: DLAGU20 RIO CHICALOTE_18 1005 270.320 1865 DLCAM80 RIO CHICALOTE_18 4012 157.660 1003
5: DLAGU20 RIO CHICALOTE_18 1005 270.320 1865 DLCAM85 RIO CHICALOTE_18 4009 323.020 573
6: DLBAJ155 RIO MULEGE_12 3003 20.300 215 <NA> <NA> NA NA NA
7: DLBAJ155 RIO MULEGE_13 3003 14.005 215 <NA> <NA> NA NA NA
8: DLBAJ157 RIO CHICALOTE_17 3007 315.340 1677 <NA> <NA> NA NA NA
9: DLBAJ157 RIO CHICALOTE_18 3007 430.210 1677 DLCAM80 RIO CHICALOTE_18 4012 157.660 1003
10: DLBAJ157 RIO CHICALOTE_18 3007 430.210 1677 DLCAM85 RIO CHICALOTE_18 4009 323.020 573
11: DLBAJ159 RIO MULEGE_13 3010 17.890 477 DLBAJ155 RIO MULEGE_13 3003 14.005 215
12: DLBAJ159 RIO MULEGE_14 3010 35.980 477 <NA> <NA> NA NA NA
13: DLCAM80 RIO CHICALOTE_18 4012 157.660 1003 DLCAM85 RIO CHICALOTE_18 4009 323.020 573
14: DLCAM85 RIO CHICALOTE_18 4009 323.020 573 <NA> <NA> NA NA NA
15: DLCAM122 RIO MULEGE_12 4001 37.810 300 DLBAJ155 RIO MULEGE_12 3003 20.300 215
library(tidyverse)
df <- tribble(
~id_emca, ~water_body, ~id_estmun, ~dbo, ~elevation,
"DLAGU20", "RIO CHICALOTE_16", 1005, 364.92, 1865,
"DLAGU20", "RIO CHICALOTE_17", 1005, 485.91, 1865,
"DLAGU20", "RIO CHICALOTE_18", 1005, 270.32, 1865,
"DLBAJ155", "RIO MULEGE_12", 3003, 20.3, 215,
"DLBAJ155", "RIO MULEGE_13", 3003, 14.005, 215,
"DLBAJ157", "RIO CHICALOTE_17", 3007, 315.34, 1677,
"DLBAJ157", "RIO CHICALOTE_18", 3007, 430.21, 1677,
"DLBAJ159", "RIO MULEGE_13", 3010, 17.89, 477,
"DLBAJ159", "RIO MULEGE_14", 3010, 35.98, 477,
"DLCAM80", "RIO CHICALOTE_18", 4012, 157.66, 1003,
"DLCAM85", "RIO CHICALOTE_18", 4009, 323.02, 573,
"DLCAM122", "RIO MULEGE_12", 4001, 37.81, 300
)
temp <- df %>%
group_by(water_body) %>%
filter(n() > 1)
df %>%
arrange(water_body) %>%
left_join(temp,
by = "water_body",
keep = TRUE) %>%
filter(elevation.x > elevation.y | is.na(elevation.y))
输出:
# A tibble: 11 x 10
id_emca.x water_body.x id_estmun.x dbo.x elevation.x id_emca.y water_body.y id_estmun.y dbo.y elevation.y
<chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
1 DLAGU20 RIO CHICALOTE_16 1005 365. 1865 NA NA NA NA NA
2 DLAGU20 RIO CHICALOTE_17 1005 486. 1865 DLBAJ157 RIO CHICALOTE_17 3007 315. 1677
3 DLAGU20 RIO CHICALOTE_18 1005 270. 1865 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677
4 DLAGU20 RIO CHICALOTE_18 1005 270. 1865 DLCAM80 RIO CHICALOTE_18 4012 158. 1003
5 DLAGU20 RIO CHICALOTE_18 1005 270. 1865 DLCAM85 RIO CHICALOTE_18 4009 323. 573
6 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677 DLCAM80 RIO CHICALOTE_18 4012 158. 1003
7 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677 DLCAM85 RIO CHICALOTE_18 4009 323. 573
8 DLCAM80 RIO CHICALOTE_18 4012 158. 1003 DLCAM85 RIO CHICALOTE_18 4009 323. 573
9 DLCAM122 RIO MULEGE_12 4001 37.8 300 DLBAJ155 RIO MULEGE_12 3003 20.3 215
10 DLBAJ159 RIO MULEGE_13 3010 17.9 477 DLBAJ155 RIO MULEGE_13 3003 14.0 215
11 DLBAJ159 RIO MULEGE_14 3010 36.0 477 NA NA NA NA NA
我有一些污染监测站的数据。我正在尝试创建一个新的 tibble,如果数据满足某些条件,它将被合并到列 elevation
的左侧。要满足的条件是 water_bodies (water_body
) 相等并且高程 (elevation
) 小于要合并到的观测值。
尝试用文字来解释这听起来很愚蠢,因为英语不是我的第一语言,但这是我正在寻找的示例,我认为这一切都清楚了。
这是数据:
tribble(
~id_emca, ~water_body, ~id_estmun, ~dbo, ~elevation,
"DLAGU20", "RIO CHICALOTE_16", 1005, 364.92, 1865,
"DLAGU20", "RIO CHICALOTE_17", 1005, 485.91, 1865,
"DLAGU20", "RIO CHICALOTE_18", 1005, 270.32, 1865,
"DLBAJ155", "RIO MULEGE_12", 3003, 20.3, 215,
"DLBAJ155", "RIO MULEGE_13", 3003, 14.005, 215,
"DLBAJ157", "RIO CHICALOTE_17", 3007, 315.34, 1677,
"DLBAJ157", "RIO CHICALOTE_18", 3007, 430.21, 1677,
"DLBAJ159", "RIO MULEGE_13", 3010, 17.89, 477,
"DLBAJ159", "RIO MULEGE_14", 3010, 35.98, 477,
"DLCAM80", "RIO CHICALOTE_18", 4012, 157.66, 1003,
"DLCAM85", "RIO CHICALOTE_18", 4009, 323.02, 573,
"DLCAM122", "RIO MULEGE_12", 4001, 37.81, 300
)
id_emca water_body id_estmun dbo elevation
<chr> <chr> <dbl> <dbl> <dbl>
1 DLAGU20 RIO CHICALOTE_16 1005 365. 1865
2 DLAGU20 RIO CHICALOTE_17 1005 486. 1865
3 DLAGU20 RIO CHICALOTE_18 1005 270. 1865
4 DLBAJ155 RIO MULEGE_12 3003 20.3 215
5 DLBAJ155 RIO MULEGE_13 3003 14.0 215
6 DLBAJ157 RIO CHICALOTE_17 3007 315. 1677
7 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677
8 DLBAJ159 RIO MULEGE_13 3010 17.9 477
9 DLBAJ159 RIO MULEGE_14 3010 36.0 477
10 DLCAM80 RIO CHICALOTE_18 4012 158. 1003
11 DLCAM85 RIO CHICALOTE_18 4009 323. 573
12 DLCAM122 RIO MULEGE_12 4001 37.8 300
新小标题应如下所示:
你可以用data.table
做一个non-equi join:
df <- tibble::tribble(
~id_emca, ~water_body, ~id_estmun, ~dbo, ~elevation,
"DLAGU20", "RIO CHICALOTE_16", 1005, 364.92, 1865,
"DLAGU20", "RIO CHICALOTE_17", 1005, 485.91, 1865,
"DLAGU20", "RIO CHICALOTE_18", 1005, 270.32, 1865,
"DLBAJ155", "RIO MULEGE_12", 3003, 20.3, 215,
"DLBAJ155", "RIO MULEGE_13", 3003, 14.005, 215,
"DLBAJ157", "RIO CHICALOTE_17", 3007, 315.34, 1677,
"DLBAJ157", "RIO CHICALOTE_18", 3007, 430.21, 1677,
"DLBAJ159", "RIO MULEGE_13", 3010, 17.89, 477,
"DLBAJ159", "RIO MULEGE_14", 3010, 35.98, 477,
"DLCAM80", "RIO CHICALOTE_18", 4012, 157.66, 1003,
"DLCAM85", "RIO CHICALOTE_18", 4009, 323.02, 573,
"DLCAM122", "RIO MULEGE_12", 4001, 37.81, 300
)
library(data.table)
setDT(df)
df[,elevation_join := elevation]
result <- df[df, on = .(water_body = water_body, elevation_join < elevation_join)]
result[,.(id_emca = i.id_emca,
water_body,
id_estmun = i.id_estmun,
dbo = i.dbo,
elevation = i.elevation,
id_emca_b = id_emca,
id_estmun_b = id_estmun,
dbo_b = dbo,
elevation_b = elevation)]
id_emca water_body id_estmun dbo elevation id_emca_b water_body_b id_estmun_b dbo_b elevation_b
1: DLAGU20 RIO CHICALOTE_16 1005 364.920 1865 <NA> <NA> NA NA NA
2: DLAGU20 RIO CHICALOTE_17 1005 485.910 1865 DLBAJ157 RIO CHICALOTE_17 3007 315.340 1677
3: DLAGU20 RIO CHICALOTE_18 1005 270.320 1865 DLBAJ157 RIO CHICALOTE_18 3007 430.210 1677
4: DLAGU20 RIO CHICALOTE_18 1005 270.320 1865 DLCAM80 RIO CHICALOTE_18 4012 157.660 1003
5: DLAGU20 RIO CHICALOTE_18 1005 270.320 1865 DLCAM85 RIO CHICALOTE_18 4009 323.020 573
6: DLBAJ155 RIO MULEGE_12 3003 20.300 215 <NA> <NA> NA NA NA
7: DLBAJ155 RIO MULEGE_13 3003 14.005 215 <NA> <NA> NA NA NA
8: DLBAJ157 RIO CHICALOTE_17 3007 315.340 1677 <NA> <NA> NA NA NA
9: DLBAJ157 RIO CHICALOTE_18 3007 430.210 1677 DLCAM80 RIO CHICALOTE_18 4012 157.660 1003
10: DLBAJ157 RIO CHICALOTE_18 3007 430.210 1677 DLCAM85 RIO CHICALOTE_18 4009 323.020 573
11: DLBAJ159 RIO MULEGE_13 3010 17.890 477 DLBAJ155 RIO MULEGE_13 3003 14.005 215
12: DLBAJ159 RIO MULEGE_14 3010 35.980 477 <NA> <NA> NA NA NA
13: DLCAM80 RIO CHICALOTE_18 4012 157.660 1003 DLCAM85 RIO CHICALOTE_18 4009 323.020 573
14: DLCAM85 RIO CHICALOTE_18 4009 323.020 573 <NA> <NA> NA NA NA
15: DLCAM122 RIO MULEGE_12 4001 37.810 300 DLBAJ155 RIO MULEGE_12 3003 20.300 215
library(tidyverse)
df <- tribble(
~id_emca, ~water_body, ~id_estmun, ~dbo, ~elevation,
"DLAGU20", "RIO CHICALOTE_16", 1005, 364.92, 1865,
"DLAGU20", "RIO CHICALOTE_17", 1005, 485.91, 1865,
"DLAGU20", "RIO CHICALOTE_18", 1005, 270.32, 1865,
"DLBAJ155", "RIO MULEGE_12", 3003, 20.3, 215,
"DLBAJ155", "RIO MULEGE_13", 3003, 14.005, 215,
"DLBAJ157", "RIO CHICALOTE_17", 3007, 315.34, 1677,
"DLBAJ157", "RIO CHICALOTE_18", 3007, 430.21, 1677,
"DLBAJ159", "RIO MULEGE_13", 3010, 17.89, 477,
"DLBAJ159", "RIO MULEGE_14", 3010, 35.98, 477,
"DLCAM80", "RIO CHICALOTE_18", 4012, 157.66, 1003,
"DLCAM85", "RIO CHICALOTE_18", 4009, 323.02, 573,
"DLCAM122", "RIO MULEGE_12", 4001, 37.81, 300
)
temp <- df %>%
group_by(water_body) %>%
filter(n() > 1)
df %>%
arrange(water_body) %>%
left_join(temp,
by = "water_body",
keep = TRUE) %>%
filter(elevation.x > elevation.y | is.na(elevation.y))
输出:
# A tibble: 11 x 10
id_emca.x water_body.x id_estmun.x dbo.x elevation.x id_emca.y water_body.y id_estmun.y dbo.y elevation.y
<chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
1 DLAGU20 RIO CHICALOTE_16 1005 365. 1865 NA NA NA NA NA
2 DLAGU20 RIO CHICALOTE_17 1005 486. 1865 DLBAJ157 RIO CHICALOTE_17 3007 315. 1677
3 DLAGU20 RIO CHICALOTE_18 1005 270. 1865 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677
4 DLAGU20 RIO CHICALOTE_18 1005 270. 1865 DLCAM80 RIO CHICALOTE_18 4012 158. 1003
5 DLAGU20 RIO CHICALOTE_18 1005 270. 1865 DLCAM85 RIO CHICALOTE_18 4009 323. 573
6 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677 DLCAM80 RIO CHICALOTE_18 4012 158. 1003
7 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677 DLCAM85 RIO CHICALOTE_18 4009 323. 573
8 DLCAM80 RIO CHICALOTE_18 4012 158. 1003 DLCAM85 RIO CHICALOTE_18 4009 323. 573
9 DLCAM122 RIO MULEGE_12 4001 37.8 300 DLBAJ155 RIO MULEGE_12 3003 20.3 215
10 DLBAJ159 RIO MULEGE_13 3010 17.9 477 DLBAJ155 RIO MULEGE_13 3003 14.0 215
11 DLBAJ159 RIO MULEGE_14 3010 36.0 477 NA NA NA NA NA