如何使用 pivot_longer() 将数据从宽格式重塑为长格式?
How can I reshape my data from wide to long format using pivot_longer()?
我有一个如下所示的数据:
cod_child<-c(1,1,1,2,2,2,7,7,9,11,11,11)
redcap_event_name<-c("visita_15_arm_1","visita_16_arm_1","visita_7_arm_1","visita_10_arm_1","visita_11_arm_1",
"visita_8_arm_1","visita_11_arm_1","visita_14_arm_1","visita_12_arm_1","visita_11_arm_1",
"visita_12_arm_1","visita_8_arm_1")
res_orin_crea<-c(88.5,58.2,70.2,62.4,142.0,42.9,26.9,17.1,148.0,26.1,NA,33.7)
res_orin_crea_tipo<-c(1,1,1,1,1,1,1,1,1,1,NA,1)
res_orin_dis24dcp<-c(0.8, NA, 0.9, 0.3, NA, 0.4, NA, NA, NA, 0.2, NA, 0.1)
res_orin_dis24dcp_tipo<-c(3,3,3,3,3,3,3,3,3,3,3,3)
df<-data.frame(cod_child,redcap_event_name,res_orin_crea,res_orin_crea_tipo,res_orin_dis24dcp,res_orin_dis24dcp_tipo)
df
cod_child redcap_event_name res_orin_crea res_orin_crea_tipo res_orin_dis24dcp res_orin_dis24dcp_tipo
1 1 visita_15_arm_1 88.5 1 0.8 3
2 1 visita_16_arm_1 58.2 1 NA 3
3 1 visita_7_arm_1 70.2 1 0.9 3
4 2 visita_10_arm_1 62.4 1 0.3 3
5 2 visita_11_arm_1 142.0 1 NA 3
6 2 visita_8_arm_1 42.9 1 0.4 3
7 7 visita_11_arm_1 26.9 1 NA 3
8 7 visita_14_arm_1 17.1 1 NA 3
9 9 visita_12_arm_1 148.0 1 NA 3
10 11 visita_11_arm_1 26.1 1 0.2 3
11 11 visita_12_arm_1 NA NA NA 3
12 11 visita_8_arm_1 33.7 1 0.1 3
我想要这样的数据:
cod_child redcap_event_name compound concentration tipo
1 1 visita_15_arm_1 crea 88.5 1
2 1 visita_15_arm_1 dis24dcp 3.0 3
3 1 visita_16_arm_1 crea 58.2 1
4 1 visita_16_arm_1 dis24dcp 3.0 3
5 1 visita_7_arm_1 crea 70.2 1
6 1 visita_7_arm_1 dis24dcp 3.0 3
7 2 visita_10_arm_1 crea 62.4 1
8 2 visita_10_arm_1 dis24dcp 3.0 3
9 2 visita_11_arm_1 crea 142.0 1
10 2 visita_11_arm_1 dis24dcp 3.0 3
11 2 visita_8_arm_1 crea 42.9 1
12 2 visita_8_arm_1 dis24dcp 3.0 3
13 7 visita_11_arm_1 crea 26.9 1
14 7 visita_11_arm_1 dis24dcp 3.0 3
15 7 visita_14_arm_1 crea 17.1 1
16 7 visita_14_arm_1 dis24dcp 3.0 3
17 9 visita_12_arm_1 crea 148.0 1
18 9 visita_12_arm_1 dis24dcp 3.0 3
19 11 visita_11_arm_1 crea 26.1 1
20 11 visita_11_arm_1 dis24dcp 3.0 3
21 11 visita_12_arm_1 crea NA NA
22 11 visita_12_arm_1 dis24dcp 3.0 3
23 11 visita_8_arm_1 crea 33.7 1
24 11 visita_8_arm_1 dis24dcp 3.0 3
我可以通过这样做来做到这一点:
A<-df%>%
select(-contains("_tipo"))%>%
pivot_longer(cols = c(starts_with("res_orin_")),
names_to = c("compound"),
names_pattern = c("res_orin_?(.*)"),
values_to = "concentration")%>%
print()
B<-df%>%
select(cod_child,redcap_event_name, contains("_tipo"))%>%
pivot_longer(cols = c(starts_with("res_orin_")),
names_to = c("compound"),
names_pattern = c("res_orin_?(.*)"),
values_to = "tipo")%>%
print()
dataf<-cbind(A,B[,4])
dataf
但我认为这可能是一种一步到位的方法。我相信 names_pattern 处应该有与正则表达式相关的内容,但我无法弄清楚。有人可以帮我吗?
具有melt
功能的data.table
版本
library(data.table)
cod_child<-c(1,1,1,2,2,2,7,7,9,11,11,11)
redcap_event_name<-c("visita_15_arm_1","visita_16_arm_1","visita_7_arm_1","visita_10_arm_1","visita_11_arm_1",
"visita_8_arm_1","visita_11_arm_1","visita_14_arm_1","visita_12_arm_1","visita_11_arm_1",
"visita_12_arm_1","visita_8_arm_1")
res_orin_crea<-c(88.5,58.2,70.2,62.4,142.0,42.9,26.9,17.1,148.0,26.1,NA,33.7)
res_orin_crea_tipo<-c(1,1,1,1,1,1,1,1,1,1,NA,1)
res_orin_dis24dcp<-c(0.8, NA, 0.9, 0.3, NA, 0.4, NA, NA, NA, 0.2, NA, 0.1)
res_orin_dis24dcp_tipo<-c(3,3,3,3,3,3,3,3,3,3,3,3)
df<-data.frame(cod_child,redcap_event_name,res_orin_crea,res_orin_crea_tipo,res_orin_dis24dcp,res_orin_dis24dcp_tipo)
dt <- melt(
setDT(df),
id = 1:2,
variable.name = "compound",
measure = patterns(concentration = "res_orin_(crea)$|(dis24dcp_tipo)$",
tipo = "tipo$")
)
# show expected format
dt[, compound:=fifelse(compound == 1,"crea","dis24dcp")][,.SD,by=.(cod_child,redcap_event_name)]
#> cod_child redcap_event_name compound concentration tipo
#> 1: 1 visita_15_arm_1 crea 88.5 1
#> 2: 1 visita_15_arm_1 dis24dcp 3.0 3
#> 3: 1 visita_16_arm_1 crea 58.2 1
#> 4: 1 visita_16_arm_1 dis24dcp 3.0 3
#> 5: 1 visita_7_arm_1 crea 70.2 1
#> 6: 1 visita_7_arm_1 dis24dcp 3.0 3
#> 7: 2 visita_10_arm_1 crea 62.4 1
#> 8: 2 visita_10_arm_1 dis24dcp 3.0 3
#> 9: 2 visita_11_arm_1 crea 142.0 1
#> 10: 2 visita_11_arm_1 dis24dcp 3.0 3
#> 11: 2 visita_8_arm_1 crea 42.9 1
#> 12: 2 visita_8_arm_1 dis24dcp 3.0 3
#> 13: 7 visita_11_arm_1 crea 26.9 1
#> 14: 7 visita_11_arm_1 dis24dcp 3.0 3
#> 15: 7 visita_14_arm_1 crea 17.1 1
#> 16: 7 visita_14_arm_1 dis24dcp 3.0 3
#> 17: 9 visita_12_arm_1 crea 148.0 1
#> 18: 9 visita_12_arm_1 dis24dcp 3.0 3
#> 19: 11 visita_11_arm_1 crea 26.1 1
#> 20: 11 visita_11_arm_1 dis24dcp 3.0 3
#> 21: 11 visita_12_arm_1 crea NA NA
#> 22: 11 visita_12_arm_1 dis24dcp 3.0 3
#> 23: 11 visita_8_arm_1 crea 33.7 1
#> 24: 11 visita_8_arm_1 dis24dcp 3.0 3
#> cod_child redcap_event_name compound concentration tipo
由 reprex package (v2.0.0)
于 2021-04-08 创建
您可以尝试在旋转之前重命名列:
df%>%rename(res_orin_crea_concentration = res_orin_crea,
res_orin_dis24dcp_concentration = res_orin_dis24dcp)%>%
pivot_longer(cols = !c(cod_child, redcap_event_name),
names_to = c("compound", ".value"),
names_pattern="res_orin_(.+)_(.+)")
# A tibble: 24 x 5
cod_child redcap_event_name compound concentration tipo
<dbl> <chr> <chr> <dbl> <dbl>
1 1 visita_15_arm_1 crea 88.5 1
2 1 visita_15_arm_1 dis24dcp 0.8 3
3 1 visita_16_arm_1 crea 58.2 1
4 1 visita_16_arm_1 dis24dcp NA 3
5 1 visita_7_arm_1 crea 70.2 1
6 1 visita_7_arm_1 dis24dcp 0.9 3
7 2 visita_10_arm_1 crea 62.4 1
8 2 visita_10_arm_1 dis24dcp 0.3 3
9 2 visita_11_arm_1 crea 142 1
10 2 visita_11_arm_1 dis24dcp NA 3
# … with 14 more rows
我有一个如下所示的数据:
cod_child<-c(1,1,1,2,2,2,7,7,9,11,11,11)
redcap_event_name<-c("visita_15_arm_1","visita_16_arm_1","visita_7_arm_1","visita_10_arm_1","visita_11_arm_1",
"visita_8_arm_1","visita_11_arm_1","visita_14_arm_1","visita_12_arm_1","visita_11_arm_1",
"visita_12_arm_1","visita_8_arm_1")
res_orin_crea<-c(88.5,58.2,70.2,62.4,142.0,42.9,26.9,17.1,148.0,26.1,NA,33.7)
res_orin_crea_tipo<-c(1,1,1,1,1,1,1,1,1,1,NA,1)
res_orin_dis24dcp<-c(0.8, NA, 0.9, 0.3, NA, 0.4, NA, NA, NA, 0.2, NA, 0.1)
res_orin_dis24dcp_tipo<-c(3,3,3,3,3,3,3,3,3,3,3,3)
df<-data.frame(cod_child,redcap_event_name,res_orin_crea,res_orin_crea_tipo,res_orin_dis24dcp,res_orin_dis24dcp_tipo)
df
cod_child redcap_event_name res_orin_crea res_orin_crea_tipo res_orin_dis24dcp res_orin_dis24dcp_tipo
1 1 visita_15_arm_1 88.5 1 0.8 3
2 1 visita_16_arm_1 58.2 1 NA 3
3 1 visita_7_arm_1 70.2 1 0.9 3
4 2 visita_10_arm_1 62.4 1 0.3 3
5 2 visita_11_arm_1 142.0 1 NA 3
6 2 visita_8_arm_1 42.9 1 0.4 3
7 7 visita_11_arm_1 26.9 1 NA 3
8 7 visita_14_arm_1 17.1 1 NA 3
9 9 visita_12_arm_1 148.0 1 NA 3
10 11 visita_11_arm_1 26.1 1 0.2 3
11 11 visita_12_arm_1 NA NA NA 3
12 11 visita_8_arm_1 33.7 1 0.1 3
我想要这样的数据:
cod_child redcap_event_name compound concentration tipo
1 1 visita_15_arm_1 crea 88.5 1
2 1 visita_15_arm_1 dis24dcp 3.0 3
3 1 visita_16_arm_1 crea 58.2 1
4 1 visita_16_arm_1 dis24dcp 3.0 3
5 1 visita_7_arm_1 crea 70.2 1
6 1 visita_7_arm_1 dis24dcp 3.0 3
7 2 visita_10_arm_1 crea 62.4 1
8 2 visita_10_arm_1 dis24dcp 3.0 3
9 2 visita_11_arm_1 crea 142.0 1
10 2 visita_11_arm_1 dis24dcp 3.0 3
11 2 visita_8_arm_1 crea 42.9 1
12 2 visita_8_arm_1 dis24dcp 3.0 3
13 7 visita_11_arm_1 crea 26.9 1
14 7 visita_11_arm_1 dis24dcp 3.0 3
15 7 visita_14_arm_1 crea 17.1 1
16 7 visita_14_arm_1 dis24dcp 3.0 3
17 9 visita_12_arm_1 crea 148.0 1
18 9 visita_12_arm_1 dis24dcp 3.0 3
19 11 visita_11_arm_1 crea 26.1 1
20 11 visita_11_arm_1 dis24dcp 3.0 3
21 11 visita_12_arm_1 crea NA NA
22 11 visita_12_arm_1 dis24dcp 3.0 3
23 11 visita_8_arm_1 crea 33.7 1
24 11 visita_8_arm_1 dis24dcp 3.0 3
我可以通过这样做来做到这一点:
A<-df%>%
select(-contains("_tipo"))%>%
pivot_longer(cols = c(starts_with("res_orin_")),
names_to = c("compound"),
names_pattern = c("res_orin_?(.*)"),
values_to = "concentration")%>%
print()
B<-df%>%
select(cod_child,redcap_event_name, contains("_tipo"))%>%
pivot_longer(cols = c(starts_with("res_orin_")),
names_to = c("compound"),
names_pattern = c("res_orin_?(.*)"),
values_to = "tipo")%>%
print()
dataf<-cbind(A,B[,4])
dataf
但我认为这可能是一种一步到位的方法。我相信 names_pattern 处应该有与正则表达式相关的内容,但我无法弄清楚。有人可以帮我吗?
具有melt
功能的data.table
版本
library(data.table)
cod_child<-c(1,1,1,2,2,2,7,7,9,11,11,11)
redcap_event_name<-c("visita_15_arm_1","visita_16_arm_1","visita_7_arm_1","visita_10_arm_1","visita_11_arm_1",
"visita_8_arm_1","visita_11_arm_1","visita_14_arm_1","visita_12_arm_1","visita_11_arm_1",
"visita_12_arm_1","visita_8_arm_1")
res_orin_crea<-c(88.5,58.2,70.2,62.4,142.0,42.9,26.9,17.1,148.0,26.1,NA,33.7)
res_orin_crea_tipo<-c(1,1,1,1,1,1,1,1,1,1,NA,1)
res_orin_dis24dcp<-c(0.8, NA, 0.9, 0.3, NA, 0.4, NA, NA, NA, 0.2, NA, 0.1)
res_orin_dis24dcp_tipo<-c(3,3,3,3,3,3,3,3,3,3,3,3)
df<-data.frame(cod_child,redcap_event_name,res_orin_crea,res_orin_crea_tipo,res_orin_dis24dcp,res_orin_dis24dcp_tipo)
dt <- melt(
setDT(df),
id = 1:2,
variable.name = "compound",
measure = patterns(concentration = "res_orin_(crea)$|(dis24dcp_tipo)$",
tipo = "tipo$")
)
# show expected format
dt[, compound:=fifelse(compound == 1,"crea","dis24dcp")][,.SD,by=.(cod_child,redcap_event_name)]
#> cod_child redcap_event_name compound concentration tipo
#> 1: 1 visita_15_arm_1 crea 88.5 1
#> 2: 1 visita_15_arm_1 dis24dcp 3.0 3
#> 3: 1 visita_16_arm_1 crea 58.2 1
#> 4: 1 visita_16_arm_1 dis24dcp 3.0 3
#> 5: 1 visita_7_arm_1 crea 70.2 1
#> 6: 1 visita_7_arm_1 dis24dcp 3.0 3
#> 7: 2 visita_10_arm_1 crea 62.4 1
#> 8: 2 visita_10_arm_1 dis24dcp 3.0 3
#> 9: 2 visita_11_arm_1 crea 142.0 1
#> 10: 2 visita_11_arm_1 dis24dcp 3.0 3
#> 11: 2 visita_8_arm_1 crea 42.9 1
#> 12: 2 visita_8_arm_1 dis24dcp 3.0 3
#> 13: 7 visita_11_arm_1 crea 26.9 1
#> 14: 7 visita_11_arm_1 dis24dcp 3.0 3
#> 15: 7 visita_14_arm_1 crea 17.1 1
#> 16: 7 visita_14_arm_1 dis24dcp 3.0 3
#> 17: 9 visita_12_arm_1 crea 148.0 1
#> 18: 9 visita_12_arm_1 dis24dcp 3.0 3
#> 19: 11 visita_11_arm_1 crea 26.1 1
#> 20: 11 visita_11_arm_1 dis24dcp 3.0 3
#> 21: 11 visita_12_arm_1 crea NA NA
#> 22: 11 visita_12_arm_1 dis24dcp 3.0 3
#> 23: 11 visita_8_arm_1 crea 33.7 1
#> 24: 11 visita_8_arm_1 dis24dcp 3.0 3
#> cod_child redcap_event_name compound concentration tipo
由 reprex package (v2.0.0)
于 2021-04-08 创建您可以尝试在旋转之前重命名列:
df%>%rename(res_orin_crea_concentration = res_orin_crea,
res_orin_dis24dcp_concentration = res_orin_dis24dcp)%>%
pivot_longer(cols = !c(cod_child, redcap_event_name),
names_to = c("compound", ".value"),
names_pattern="res_orin_(.+)_(.+)")
# A tibble: 24 x 5
cod_child redcap_event_name compound concentration tipo
<dbl> <chr> <chr> <dbl> <dbl>
1 1 visita_15_arm_1 crea 88.5 1
2 1 visita_15_arm_1 dis24dcp 0.8 3
3 1 visita_16_arm_1 crea 58.2 1
4 1 visita_16_arm_1 dis24dcp NA 3
5 1 visita_7_arm_1 crea 70.2 1
6 1 visita_7_arm_1 dis24dcp 0.9 3
7 2 visita_10_arm_1 crea 62.4 1
8 2 visita_10_arm_1 dis24dcp 0.3 3
9 2 visita_11_arm_1 crea 142 1
10 2 visita_11_arm_1 dis24dcp NA 3
# … with 14 more rows