用 tidyr 收集两组列
gather two groups of columns with tidyr
我有这个示例数据集,包含 3 个处理,两个响应变量 (D, var) for 3 classes (o, l, h):
# fungic D_o Var_o D_l Var_l D_h Var_h
# 1 AZ_BF 343.6 25756 223.8 20127 442.5 22843
# 2 AZ_CIPRO 186.6 25756 58.2 20127 311.7 22843
# 3 AZ_SOLA 382.6 25756 205.6 20127 520.5 22843
d = structure(list(fungic = c("AZ_BF", "AZ_CIPRO", "AZ_SOLA"), D_o = c(343.6, 186.6, 382.6), Var_o = c(25756L, 25756L, 25756L), D_l = c(223.8,
58.2, 205.6), Var_l = c(20127L, 20127L, 20127L), D_h = c(442.5,
311.7, 520.5), Var_h = c(22843L, 22843L, 22843L)), .Names = c("fungic",
"D_o", "Var_o", "D_l", "Var_l", "D_h", "Var_h"), row.names = c(NA,3L), class = "data.frame")
我想收集具有相同列名称第二部分的列,即所有 D 在一起,所有 Var 在一起,保持 class 级别,
像这样:
fungic C D Var
1 AZ_BF o 343.6 25756
4 AZ_BF o 223.8 25756
7 AZ_BF o 442.5 25756
10 AZ_BF l 343.6 20127
13 AZ_BF l 223.8 20127
16 AZ_BF l 442.5 20127
19 AZ_BF h 343.6 22843
22 AZ_BF h 223.8 22843
25 AZ_BF h 442.5 22843
2 AZ_CIPRO o 186.6 25756
5 AZ_CIPRO o 58.2 25756
8 AZ_CIPRO o 311.7 25756
11 AZ_CIPRO l 186.6 20127
14 AZ_CIPRO l 58.2 20127
17 AZ_CIPRO l 311.7 20127
20 AZ_CIPRO h 186.6 22843
23 AZ_CIPRO h 58.2 22843
26 AZ_CIPRO h 311.7 22843
3 AZ_SOLA o 382.6 25756
6 AZ_SOLA o 205.6 25756
9 AZ_SOLA o 520.5 25756
12 AZ_SOLA l 382.6 20127
15 AZ_SOLA l 205.6 20127
18 AZ_SOLA l 520.5 20127
21 AZ_SOLA h 382.6 22843
24 AZ_SOLA h 205.6 22843
27 AZ_SOLA h 520.5 22843
我用这个可怕的代码做到了...
d1 = d %>% gather(var_Class, D, starts_with("D_")) %>%
gather(C, Var, starts_with("Var_"))
d2 = d1[1:27,]
d3 = d2 %>% mutate(C=sapply(strsplit(C, split='_', fixed=TRUE),function(x) (x[2])))
d3 = d3[order(d3$fungic),]
有人可以帮助改进吗?我真的不想陷入 excel 请..
@akrun 你完全正确,但我完全错了...
抱歉,我真的需要这个:
fungic class D var
AZ_BF o 343.6 25756
AZ_CIPRO o 186.6 25756
AZ_SOLA o 382.6 25756
AZ_BF l 223.8 20127
AZ_CIPRO l 58.2 20127
AZ_SOLA l 205.6 20127
AZ_BF h 442.5 22843
AZ_CIPRO h 311.7 22843
AZ_SOLA h 520.5 22843
将第一个gather
、separate
和'var_Class'分成两列,去掉不需要的列,应用第二个gather
,去掉'Var' 和 arrange
通过 'fungic'
gather(d, var_Class, D, starts_with("D")) %>%
separate(var_Class, into = c("Dvar", "YR")) %>%
select(-Dvar) %>%
gather(Var, Val, starts_with("Var")) %>%
select(-Var) %>%
arrange(fungic)
# fungic YR D Val
#1 AZ_BF o 343.6 25756
#2 AZ_BF l 223.8 25756
#3 AZ_BF h 442.5 25756
#4 AZ_BF o 343.6 20127
#5 AZ_BF l 223.8 20127
#6 AZ_BF h 442.5 20127
#7 AZ_BF o 343.6 22843
#8 AZ_BF l 223.8 22843
#9 AZ_BF h 442.5 22843
#10 AZ_CIPRO o 186.6 25756
#11 AZ_CIPRO l 58.2 25756
#12 AZ_CIPRO h 311.7 25756
#13 AZ_CIPRO o 186.6 20127
#14 AZ_CIPRO l 58.2 20127
#15 AZ_CIPRO h 311.7 20127
#16 AZ_CIPRO o 186.6 22843
#17 AZ_CIPRO l 58.2 22843
#18 AZ_CIPRO h 311.7 22843
#19 AZ_SOLA o 382.6 25756
#20 AZ_SOLA l 205.6 25756
#21 AZ_SOLA h 520.5 25756
#22 AZ_SOLA o 382.6 20127
#23 AZ_SOLA l 205.6 20127
#24 AZ_SOLA h 520.5 20127
#25 AZ_SOLA o 382.6 22843
#26 AZ_SOLA l 205.6 22843
#27 AZ_SOLA h 520.5 22843
我们可以先使用gather
创建一个3列的data.frame,一列用于id("fungic"),一列用于其余列的名称,一列用于值。然后,我们为列名的前缀和后缀创建两个变量。最后,我们使用 spread
将数据转换为宽格式。
d %>%
gather(vrb, val, -fungic) %>%
mutate(D_Var = gsub("(.*)\_(.*)", "\1", vrb),
class = gsub("(.*)\_(.*)", "\2", vrb)) %>%
select(-vrb) %>%
spread(D_Var, val) %>%
arrange(desc(class))
输出:
# fungic class D Var
# 1 AZ_BF o 343.6 25756
# 2 AZ_CIPRO o 186.6 25756
# 3 AZ_SOLA o 382.6 25756
# 4 AZ_BF l 223.8 20127
# 5 AZ_CIPRO l 58.2 20127
# 6 AZ_SOLA l 205.6 20127
# 7 AZ_BF h 442.5 22843
# 8 AZ_CIPRO h 311.7 22843
# 9 AZ_SOLA h 520.5 22843
我有这个示例数据集,包含 3 个处理,两个响应变量 (D, var) for 3 classes (o, l, h):
# fungic D_o Var_o D_l Var_l D_h Var_h
# 1 AZ_BF 343.6 25756 223.8 20127 442.5 22843
# 2 AZ_CIPRO 186.6 25756 58.2 20127 311.7 22843
# 3 AZ_SOLA 382.6 25756 205.6 20127 520.5 22843
d = structure(list(fungic = c("AZ_BF", "AZ_CIPRO", "AZ_SOLA"), D_o = c(343.6, 186.6, 382.6), Var_o = c(25756L, 25756L, 25756L), D_l = c(223.8,
58.2, 205.6), Var_l = c(20127L, 20127L, 20127L), D_h = c(442.5,
311.7, 520.5), Var_h = c(22843L, 22843L, 22843L)), .Names = c("fungic",
"D_o", "Var_o", "D_l", "Var_l", "D_h", "Var_h"), row.names = c(NA,3L), class = "data.frame")
我想收集具有相同列名称第二部分的列,即所有 D 在一起,所有 Var 在一起,保持 class 级别, 像这样:
fungic C D Var
1 AZ_BF o 343.6 25756
4 AZ_BF o 223.8 25756
7 AZ_BF o 442.5 25756
10 AZ_BF l 343.6 20127
13 AZ_BF l 223.8 20127
16 AZ_BF l 442.5 20127
19 AZ_BF h 343.6 22843
22 AZ_BF h 223.8 22843
25 AZ_BF h 442.5 22843
2 AZ_CIPRO o 186.6 25756
5 AZ_CIPRO o 58.2 25756
8 AZ_CIPRO o 311.7 25756
11 AZ_CIPRO l 186.6 20127
14 AZ_CIPRO l 58.2 20127
17 AZ_CIPRO l 311.7 20127
20 AZ_CIPRO h 186.6 22843
23 AZ_CIPRO h 58.2 22843
26 AZ_CIPRO h 311.7 22843
3 AZ_SOLA o 382.6 25756
6 AZ_SOLA o 205.6 25756
9 AZ_SOLA o 520.5 25756
12 AZ_SOLA l 382.6 20127
15 AZ_SOLA l 205.6 20127
18 AZ_SOLA l 520.5 20127
21 AZ_SOLA h 382.6 22843
24 AZ_SOLA h 205.6 22843
27 AZ_SOLA h 520.5 22843
我用这个可怕的代码做到了...
d1 = d %>% gather(var_Class, D, starts_with("D_")) %>%
gather(C, Var, starts_with("Var_"))
d2 = d1[1:27,]
d3 = d2 %>% mutate(C=sapply(strsplit(C, split='_', fixed=TRUE),function(x) (x[2])))
d3 = d3[order(d3$fungic),]
有人可以帮助改进吗?我真的不想陷入 excel 请..
@akrun 你完全正确,但我完全错了...
抱歉,我真的需要这个:
fungic class D var
AZ_BF o 343.6 25756
AZ_CIPRO o 186.6 25756
AZ_SOLA o 382.6 25756
AZ_BF l 223.8 20127
AZ_CIPRO l 58.2 20127
AZ_SOLA l 205.6 20127
AZ_BF h 442.5 22843
AZ_CIPRO h 311.7 22843
AZ_SOLA h 520.5 22843
将第一个gather
、separate
和'var_Class'分成两列,去掉不需要的列,应用第二个gather
,去掉'Var' 和 arrange
通过 'fungic'
gather(d, var_Class, D, starts_with("D")) %>%
separate(var_Class, into = c("Dvar", "YR")) %>%
select(-Dvar) %>%
gather(Var, Val, starts_with("Var")) %>%
select(-Var) %>%
arrange(fungic)
# fungic YR D Val
#1 AZ_BF o 343.6 25756
#2 AZ_BF l 223.8 25756
#3 AZ_BF h 442.5 25756
#4 AZ_BF o 343.6 20127
#5 AZ_BF l 223.8 20127
#6 AZ_BF h 442.5 20127
#7 AZ_BF o 343.6 22843
#8 AZ_BF l 223.8 22843
#9 AZ_BF h 442.5 22843
#10 AZ_CIPRO o 186.6 25756
#11 AZ_CIPRO l 58.2 25756
#12 AZ_CIPRO h 311.7 25756
#13 AZ_CIPRO o 186.6 20127
#14 AZ_CIPRO l 58.2 20127
#15 AZ_CIPRO h 311.7 20127
#16 AZ_CIPRO o 186.6 22843
#17 AZ_CIPRO l 58.2 22843
#18 AZ_CIPRO h 311.7 22843
#19 AZ_SOLA o 382.6 25756
#20 AZ_SOLA l 205.6 25756
#21 AZ_SOLA h 520.5 25756
#22 AZ_SOLA o 382.6 20127
#23 AZ_SOLA l 205.6 20127
#24 AZ_SOLA h 520.5 20127
#25 AZ_SOLA o 382.6 22843
#26 AZ_SOLA l 205.6 22843
#27 AZ_SOLA h 520.5 22843
我们可以先使用gather
创建一个3列的data.frame,一列用于id("fungic"),一列用于其余列的名称,一列用于值。然后,我们为列名的前缀和后缀创建两个变量。最后,我们使用 spread
将数据转换为宽格式。
d %>%
gather(vrb, val, -fungic) %>%
mutate(D_Var = gsub("(.*)\_(.*)", "\1", vrb),
class = gsub("(.*)\_(.*)", "\2", vrb)) %>%
select(-vrb) %>%
spread(D_Var, val) %>%
arrange(desc(class))
输出:
# fungic class D Var
# 1 AZ_BF o 343.6 25756
# 2 AZ_CIPRO o 186.6 25756
# 3 AZ_SOLA o 382.6 25756
# 4 AZ_BF l 223.8 20127
# 5 AZ_CIPRO l 58.2 20127
# 6 AZ_SOLA l 205.6 20127
# 7 AZ_BF h 442.5 22843
# 8 AZ_CIPRO h 311.7 22843
# 9 AZ_SOLA h 520.5 22843