通过过滤、分组然后以语句为条件重命名因子级别
Rename levels of factor by filtering, grouping then conditional on statement
我有一个这样的数据框:
data<- data.frame(ID= seq(1,12, 1),
plantfam= c(1,1,2,2,1,1,1,1,2,2,3,3),
lepsp= c(rep("A", 4), "B", "B", rep("C", 6)),
needsmorpho= c(rep("yes", 4),"no", "no", rep("yes", 6)))
我需要先过滤所有 needsmorpho
,即 yes
。然后我需要将所有 lepsp
与相同的 plantfam
分组。对于每个独特的 lepsp
和 plantfam
匹配,lepsp
将被赋予一个独特的形态物种名称。为了制作一个 morphosp 名称,我想粘贴 morphosp
和一个基于唯一的 lepsp
和 plantfam
匹配的唯一编号。输出将是:
output<- data.frame(ID= seq(1,12, 1),
plantfam= c(1,1,2,2,1,1,1,1,2,2,3,3),
lepsp= c("A_morpho1","A_morpho1","A_morpho2","A_morpho2",
"B","B","C_morpho1","C_morpho1",
"C_morpho2","C_morpho2","C_morpho3","C_morpho3"),
needsmorpho= c(rep("yes", 4),"no", "no", rep("yes", 6)))
我试过:
subset1 <-
file %>%
filter(NeedsMorpho == "yes") %>%
group_by(lepsp) %>%
mutate(lepsp =
paste0(lepsp,"_morphosp",match(plantfam,unique(plantfam))))
subset2 <-
file %>%
filter(NeedsMorpho == "yes") %>%
setdiff(file, .)
file<-union(subset1, subset2) %>% arrange(lepsp)
这是否实现了您的目标?
library( data.table )
setDT(data)
data[ needsmorpho == "yes", lepsp := paste0(lepsp,"_morphosp",plantfam) ]
使用 dplyr
中的 case_when
,您可以执行以下操作:
library(tidyverse)
data %>%
mutate(lepsp = case_when(needsmorpho == "yes" ~ paste0(lepsp, "_morpho", plantfam),
TRUE ~ as.character(lepsp)))
哪个returns:
ID plantfam lepsp needsmorpho
1 1 1 A_morpho1 yes
2 2 1 A_morpho1 yes
3 3 2 A_morpho2 yes
4 4 2 A_morpho2 yes
5 5 1 B no
6 6 1 B no
7 7 1 C_morpho1 yes
8 8 1 C_morpho1 yes
9 9 2 C_morpho2 yes
10 10 2 C_morpho2 yes
11 11 3 C_morpho3 yes
12 12 3 C_morpho3 yes
我有一个这样的数据框:
data<- data.frame(ID= seq(1,12, 1),
plantfam= c(1,1,2,2,1,1,1,1,2,2,3,3),
lepsp= c(rep("A", 4), "B", "B", rep("C", 6)),
needsmorpho= c(rep("yes", 4),"no", "no", rep("yes", 6)))
我需要先过滤所有 needsmorpho
,即 yes
。然后我需要将所有 lepsp
与相同的 plantfam
分组。对于每个独特的 lepsp
和 plantfam
匹配,lepsp
将被赋予一个独特的形态物种名称。为了制作一个 morphosp 名称,我想粘贴 morphosp
和一个基于唯一的 lepsp
和 plantfam
匹配的唯一编号。输出将是:
output<- data.frame(ID= seq(1,12, 1),
plantfam= c(1,1,2,2,1,1,1,1,2,2,3,3),
lepsp= c("A_morpho1","A_morpho1","A_morpho2","A_morpho2",
"B","B","C_morpho1","C_morpho1",
"C_morpho2","C_morpho2","C_morpho3","C_morpho3"),
needsmorpho= c(rep("yes", 4),"no", "no", rep("yes", 6)))
我试过:
subset1 <-
file %>%
filter(NeedsMorpho == "yes") %>%
group_by(lepsp) %>%
mutate(lepsp =
paste0(lepsp,"_morphosp",match(plantfam,unique(plantfam))))
subset2 <-
file %>%
filter(NeedsMorpho == "yes") %>%
setdiff(file, .)
file<-union(subset1, subset2) %>% arrange(lepsp)
这是否实现了您的目标?
library( data.table )
setDT(data)
data[ needsmorpho == "yes", lepsp := paste0(lepsp,"_morphosp",plantfam) ]
使用 dplyr
中的 case_when
,您可以执行以下操作:
library(tidyverse)
data %>%
mutate(lepsp = case_when(needsmorpho == "yes" ~ paste0(lepsp, "_morpho", plantfam),
TRUE ~ as.character(lepsp)))
哪个returns:
ID plantfam lepsp needsmorpho
1 1 1 A_morpho1 yes
2 2 1 A_morpho1 yes
3 3 2 A_morpho2 yes
4 4 2 A_morpho2 yes
5 5 1 B no
6 6 1 B no
7 7 1 C_morpho1 yes
8 8 1 C_morpho1 yes
9 9 2 C_morpho2 yes
10 10 2 C_morpho2 yes
11 11 3 C_morpho3 yes
12 12 3 C_morpho3 yes