通过R根据规则在同一组的两行之间执行减法
Perform subtraction between two rows of the same group based on a rule via R
我有一个数据框,其中包含 2005 年和 2018 年的土地使用数据。我想生成一个新的 data.frame,显示每一列的 2005 年和 2018 年之间的差异,以便如果减少,减号(-)。例如,如果变量 Veg
在 2005 年有 1000 公顷,在 2018 年有 8000 公顷,那么 data.frame 应该表示 -200。
data.frame 示例:
df
df<-structure(list(place = c("F01", "F01", "F02", "F02", "F03", "F03",
"F04", "F04", "F05", "F05", "F06", "F06"), year = c(2005, 2018,
2005, 2018, 2005, 2018, 2005, 2018, 2005, 2018, 2005, 2018),
Veg = c(12281.5824712026, 12292.2267477317, 7254.98919713131,
7488.9138055415, 864.182200710528, 941.602680778032, 549.510775817472,
584.104674537216, 5577.10195081334, 5688.28474549675, 1244.96456185886,
1306.41862713264), Agri = c(113.178596532624, 1376.68748390712,
85.2373706436, 1048.71071335262, 0, 46.236076173504, 0, 46.236076173504,
85.2373706436, 1002.47463717912, 1.413692976528, 228.851945376768
), Past = c(9190.16856517738, 7855.55923692456, 5029.33750161394,
3776.9718412309, 983.015569149264, 800.981808818688, 710.255983089744,
572.213021852304, 3726.66100294858, 2700.40306039963, 879.982298683488,
597.410020198656), Urb = c(146.026168634304, 200.910719487744,
146.026168634304, 200.910719487744, 141.119822421648, 194.840155529712,
141.119822421648, 194.840155529712, 4.906346212656, 6.070563958032,
NA, NA), SoloExp = c(61.12143163224, 67.940421283728, 61.12143163224,
62.451966198384, 50.144521461552, 54.801392443056, 49.146620536944,
52.639273773072, 9.895850835696, 7.650573755328, 6.320039189184,
1.164217745376), Hidro = c(9.230583552624, 7.983207396864,
9.230583552624, 7.983207396864, NA, NA, NA, NA, 7.401098524176,
6.320039189184, 5.654771906112, 4.490554160736), total = c(691953.981181971,
691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971,
691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971,
691953.981181971, 691953.981181971, 691953.981181971)), row.names = c(NA,
-12L), class = "data.frame")
我想要这样的输出
这是一个对行顺序具有鲁棒性的 dplyr 管道(例如,如果 2005
曾经出现 after 2018
无论出于何种原因)。
library(dplyr)
library(tidyr) # pivot_*
df %>%
pivot_longer(-c("place", "year")) %>%
pivot_wider(c("place", "name"), names_from = "year", values_from = "value") %>%
mutate(results = coalesce(`2005` - `2018`, 0)) %>%
transmute(place, name, results = dplyr::coalesce(results, 0)) %>%
pivot_wider(place, names_from = "name", values_from = "results")
# # A tibble: 6 x 8
# place Veg Agri Past Urb SoloExp Hidro total
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 F01 -10.6 -1264. 1335. -54.9 -6.82 1.25 0
# 2 F02 -234. -963. 1252. -54.9 -1.33 1.25 0
# 3 F03 -77.4 -46.2 182. -53.7 -4.66 0 0
# 4 F04 -34.6 -46.2 138. -53.7 -3.49 0 0
# 5 F05 -111. -917. 1026. -1.16 2.25 1.08 0
# 6 F06 -61.5 -227. 283. 0 5.16 1.16 0
我们可以使用 across
函数来做到这一点:
library(dplyr)
df %>%
mutate(across(-c(place, year), ~ lag(., default = .[1]) - .)) %>%
filter(year==2018) %>%
select(-year)
place Veg Agri Past Urb SoloExp Hidro total
1 F01 -10.64428 -1263.50889 1334.6093 -54.884551 -6.818990 1.247376 0
2 F02 -233.92461 -963.47334 1252.3657 -54.884551 -1.330535 1.247376 0
3 F03 -77.42048 -46.23608 182.0338 -53.720333 -4.656871 NA 0
4 F04 -34.59390 -46.23608 138.0430 -53.720333 -3.492653 NA 0
5 F05 -111.18279 -917.23727 1026.2579 -1.164218 2.245277 1.081059 0
6 F06 -61.45407 -227.43825 282.5723 NA 5.155821 1.164218 0
我有一个数据框,其中包含 2005 年和 2018 年的土地使用数据。我想生成一个新的 data.frame,显示每一列的 2005 年和 2018 年之间的差异,以便如果减少,减号(-)。例如,如果变量 Veg
在 2005 年有 1000 公顷,在 2018 年有 8000 公顷,那么 data.frame 应该表示 -200。
data.frame 示例:
df
df<-structure(list(place = c("F01", "F01", "F02", "F02", "F03", "F03", "F04", "F04", "F05", "F05", "F06", "F06"), year = c(2005, 2018, 2005, 2018, 2005, 2018, 2005, 2018, 2005, 2018, 2005, 2018), Veg = c(12281.5824712026, 12292.2267477317, 7254.98919713131, 7488.9138055415, 864.182200710528, 941.602680778032, 549.510775817472, 584.104674537216, 5577.10195081334, 5688.28474549675, 1244.96456185886, 1306.41862713264), Agri = c(113.178596532624, 1376.68748390712, 85.2373706436, 1048.71071335262, 0, 46.236076173504, 0, 46.236076173504, 85.2373706436, 1002.47463717912, 1.413692976528, 228.851945376768 ), Past = c(9190.16856517738, 7855.55923692456, 5029.33750161394, 3776.9718412309, 983.015569149264, 800.981808818688, 710.255983089744, 572.213021852304, 3726.66100294858, 2700.40306039963, 879.982298683488, 597.410020198656), Urb = c(146.026168634304, 200.910719487744, 146.026168634304, 200.910719487744, 141.119822421648, 194.840155529712, 141.119822421648, 194.840155529712, 4.906346212656, 6.070563958032, NA, NA), SoloExp = c(61.12143163224, 67.940421283728, 61.12143163224, 62.451966198384, 50.144521461552, 54.801392443056, 49.146620536944, 52.639273773072, 9.895850835696, 7.650573755328, 6.320039189184, 1.164217745376), Hidro = c(9.230583552624, 7.983207396864, 9.230583552624, 7.983207396864, NA, NA, NA, NA, 7.401098524176, 6.320039189184, 5.654771906112, 4.490554160736), total = c(691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971, 691953.981181971)), row.names = c(NA, -12L), class = "data.frame")
我想要这样的输出
这是一个对行顺序具有鲁棒性的 dplyr 管道(例如,如果 2005
曾经出现 after 2018
无论出于何种原因)。
library(dplyr)
library(tidyr) # pivot_*
df %>%
pivot_longer(-c("place", "year")) %>%
pivot_wider(c("place", "name"), names_from = "year", values_from = "value") %>%
mutate(results = coalesce(`2005` - `2018`, 0)) %>%
transmute(place, name, results = dplyr::coalesce(results, 0)) %>%
pivot_wider(place, names_from = "name", values_from = "results")
# # A tibble: 6 x 8
# place Veg Agri Past Urb SoloExp Hidro total
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 F01 -10.6 -1264. 1335. -54.9 -6.82 1.25 0
# 2 F02 -234. -963. 1252. -54.9 -1.33 1.25 0
# 3 F03 -77.4 -46.2 182. -53.7 -4.66 0 0
# 4 F04 -34.6 -46.2 138. -53.7 -3.49 0 0
# 5 F05 -111. -917. 1026. -1.16 2.25 1.08 0
# 6 F06 -61.5 -227. 283. 0 5.16 1.16 0
我们可以使用 across
函数来做到这一点:
library(dplyr)
df %>%
mutate(across(-c(place, year), ~ lag(., default = .[1]) - .)) %>%
filter(year==2018) %>%
select(-year)
place Veg Agri Past Urb SoloExp Hidro total
1 F01 -10.64428 -1263.50889 1334.6093 -54.884551 -6.818990 1.247376 0
2 F02 -233.92461 -963.47334 1252.3657 -54.884551 -1.330535 1.247376 0
3 F03 -77.42048 -46.23608 182.0338 -53.720333 -4.656871 NA 0
4 F04 -34.59390 -46.23608 138.0430 -53.720333 -3.492653 NA 0
5 F05 -111.18279 -917.23727 1026.2579 -1.164218 2.245277 1.081059 0
6 F06 -61.45407 -227.43825 282.5723 NA 5.155821 1.164218 0