隐式 NA 在 Tidyverse 中生成 table
Implicit NA's making a table in the Tidyverse
我 运行 在尝试使用 tidyverse
创建 table 时收到错误消息。错误消息显示为
"Factor Com.Race contains implicit NA, consider using
forcats::fct_explicit_na".
说到 tidyverse,我是菜鸟。所以我没能尝试太多。
Major_A <- rep("Major A", times=150)
set.seed(1984)
gender <- sample(c("Female","Male"), prob=c(.95,.05),size=150, replace=T)
race.asian <- sample(c("Y","N"),prob= c(.01,.99),size=150, replace=T)
race.black <- sample(c("Y","N"),prob= c(.1,.9),size=150, replace=T)
race.AmInd <- sample(c("Y","N"),prob= c(.01,.99),size=150, replace=T)
race.hawa <- sample(c("Y","N"),prob= c(.01,.99),size=150, replace=T)
race.hisp <- sample(c("Y","N"),prob= c(.02,.98),size=150, replace=T)
race.white <- sample(c("Y","N"),prob=c(.8,.2),size=150,replace=T)
race.NotR <- sample(c("Y","N"),prob=c(.01,.98),size=150,replace=T)
degree <- sample(c("BA","MAT"),prob=c(.9,.1),size=150,replace=T)
enroll <- data.frame(Major_A,gender,race.asian,race.black,race.AmInd,race.hawa,race.hisp,race.white, race.NotR, degree)
multi.race_fun <- function(dat,startr,endr){
dat$multi <- rowSums(dat[,startr:endr]=="Y")
return(dat)
}
enroll.multiR <- multi.race_fun(enroll,3,9)
# load comrace function
com_race.fun <- function(dat){
dat$Com.Race <- ifelse(dat$race.hisp=="Y","Hispanic",
ifelse(dat$race.black=="Y" & dat$multi==1, "African Am",
ifelse(dat$race.AmInd=="Y" & dat$multi==1,"Native Am",
ifelse(dat$race.asian=="Y" & dat$multi==1,"Asian",
ifelse(dat$race.hawa=="Y" & dat$multi==1, "Hawaiian",
ifelse(dat$race.white=="Y" & dat$multi==1,"Caucasian",
ifelse(dat$multi>=2,"Two or More Races","Not Reported")))))))
return(dat)
}
# run comrace function
enroll.comR <- com_race.fun(enroll.multiR)
enroll.comR$gender <- factor(enroll.comR$gender, levels= c("Female", "Male"))
enroll.comR$Com.Race <- factor(enroll.comR$Com.Race, levels=c("African Am","Asian","Caucasian","Hawaiian","Hispancic","Two or More Races", "Not Reported"))
library(tidyverse)
gen_race.tbl<- enroll.comR%>%
group_by(Com.Race, gender, .drop = FALSE) %>%
summarise(count = n()) %>%
ungroup() %>%
mutate(perc = (count/sum(count)*100)) %>%
gather(key, value, -gender, -Com.Race) %>%
unite(Com.Race, Com.Race, key) %>%
spread(Com.Race, value)
我希望代码生成 table,其中包含 gender
和 Com.Race
变量所有级别的计数和百分比。
我建议在开始时使用 dplyr
中的 gather()
来重组您的 wide-format 数据,然后您可以总结每个性别级别的 counts/percentages和种族变量。在最后使用 reshape2::dcast()
将提供您想要的输出,但也可以使用 spread()
。
# toy data set
df <- data.frame(gender=sample(c('M','F'),100,T,prob=c(0.9,0.1)),
ethn.a=sample(c('Y','N'),100,T,prob=c(0.8,0.2)),
ethn.b=sample(c('Y','N'),100,T,prob=c(0.7,0.3)),
ethn.c=sample(c('Y','N'),100,T,prob=c(0.25,0.75)),
ethn.d=sample(c('Y','N'),100,T,prob=c(0.95,0.05)))
# gather wide data, group by gender/ethnicity, summarise, reshape to wide format
df %>% gather(k,v,-gender) %>% group_by(gender,k,v) %>%
summarise(n=n()) %>% mutate(perc=round((n/sum(n))*100,2)) %>%
mutate(cell=paste0(n,' (',sprintf("%.1f",perc),'%)')) %>%
select(-n,-perc) %>%
filter(v=='Y') %>% reshape2::dcast(v~k+gender, value.var = 'cell')
v ethn.a_F ethn.a_M ethn.b_F ethn.b_M ethn.c_F ethn.c_M ethn.d_F ethn.d_M
1 Y 11 (84.6%) 69 (79.3%) 10 (76.9%) 66 (75.9%) 3 (23.1%) 28 (32.2%) 12 (92.3%) 87 (100.0%)
# using spread()
df %>% gather(k,v,-gender) %>% group_by(gender,k,v) %>%
summarise(n=n()) %>% mutate(perc=round((n/sum(n))*100,2)) %>%
mutate(cell=paste0(n,' (',sprintf("%.1f",perc),'%)')) %>%
select(-n,-perc) %>%
filter(v=='Y') %>%
spread(k,cell,fill=0)
# A tibble: 2 x 6
# Groups: gender [2]
gender v ethn.a ethn.b ethn.c ethn.d
<fct> <chr> <chr> <chr> <chr> <chr>
1 F Y 11 (84.6%) 10 (76.9%) 3 (23.1%) 12 (92.3%)
2 M Y 69 (79.3%) 66 (75.9%) 28 (32.2%) 87 (100.0%)
我 运行 在尝试使用 tidyverse
创建 table 时收到错误消息。错误消息显示为
"Factor Com.Race contains implicit NA, consider using forcats::fct_explicit_na".
说到 tidyverse,我是菜鸟。所以我没能尝试太多。
Major_A <- rep("Major A", times=150)
set.seed(1984)
gender <- sample(c("Female","Male"), prob=c(.95,.05),size=150, replace=T)
race.asian <- sample(c("Y","N"),prob= c(.01,.99),size=150, replace=T)
race.black <- sample(c("Y","N"),prob= c(.1,.9),size=150, replace=T)
race.AmInd <- sample(c("Y","N"),prob= c(.01,.99),size=150, replace=T)
race.hawa <- sample(c("Y","N"),prob= c(.01,.99),size=150, replace=T)
race.hisp <- sample(c("Y","N"),prob= c(.02,.98),size=150, replace=T)
race.white <- sample(c("Y","N"),prob=c(.8,.2),size=150,replace=T)
race.NotR <- sample(c("Y","N"),prob=c(.01,.98),size=150,replace=T)
degree <- sample(c("BA","MAT"),prob=c(.9,.1),size=150,replace=T)
enroll <- data.frame(Major_A,gender,race.asian,race.black,race.AmInd,race.hawa,race.hisp,race.white, race.NotR, degree)
multi.race_fun <- function(dat,startr,endr){
dat$multi <- rowSums(dat[,startr:endr]=="Y")
return(dat)
}
enroll.multiR <- multi.race_fun(enroll,3,9)
# load comrace function
com_race.fun <- function(dat){
dat$Com.Race <- ifelse(dat$race.hisp=="Y","Hispanic",
ifelse(dat$race.black=="Y" & dat$multi==1, "African Am",
ifelse(dat$race.AmInd=="Y" & dat$multi==1,"Native Am",
ifelse(dat$race.asian=="Y" & dat$multi==1,"Asian",
ifelse(dat$race.hawa=="Y" & dat$multi==1, "Hawaiian",
ifelse(dat$race.white=="Y" & dat$multi==1,"Caucasian",
ifelse(dat$multi>=2,"Two or More Races","Not Reported")))))))
return(dat)
}
# run comrace function
enroll.comR <- com_race.fun(enroll.multiR)
enroll.comR$gender <- factor(enroll.comR$gender, levels= c("Female", "Male"))
enroll.comR$Com.Race <- factor(enroll.comR$Com.Race, levels=c("African Am","Asian","Caucasian","Hawaiian","Hispancic","Two or More Races", "Not Reported"))
library(tidyverse)
gen_race.tbl<- enroll.comR%>%
group_by(Com.Race, gender, .drop = FALSE) %>%
summarise(count = n()) %>%
ungroup() %>%
mutate(perc = (count/sum(count)*100)) %>%
gather(key, value, -gender, -Com.Race) %>%
unite(Com.Race, Com.Race, key) %>%
spread(Com.Race, value)
我希望代码生成 table,其中包含 gender
和 Com.Race
变量所有级别的计数和百分比。
我建议在开始时使用 dplyr
中的 gather()
来重组您的 wide-format 数据,然后您可以总结每个性别级别的 counts/percentages和种族变量。在最后使用 reshape2::dcast()
将提供您想要的输出,但也可以使用 spread()
。
# toy data set
df <- data.frame(gender=sample(c('M','F'),100,T,prob=c(0.9,0.1)),
ethn.a=sample(c('Y','N'),100,T,prob=c(0.8,0.2)),
ethn.b=sample(c('Y','N'),100,T,prob=c(0.7,0.3)),
ethn.c=sample(c('Y','N'),100,T,prob=c(0.25,0.75)),
ethn.d=sample(c('Y','N'),100,T,prob=c(0.95,0.05)))
# gather wide data, group by gender/ethnicity, summarise, reshape to wide format
df %>% gather(k,v,-gender) %>% group_by(gender,k,v) %>%
summarise(n=n()) %>% mutate(perc=round((n/sum(n))*100,2)) %>%
mutate(cell=paste0(n,' (',sprintf("%.1f",perc),'%)')) %>%
select(-n,-perc) %>%
filter(v=='Y') %>% reshape2::dcast(v~k+gender, value.var = 'cell')
v ethn.a_F ethn.a_M ethn.b_F ethn.b_M ethn.c_F ethn.c_M ethn.d_F ethn.d_M
1 Y 11 (84.6%) 69 (79.3%) 10 (76.9%) 66 (75.9%) 3 (23.1%) 28 (32.2%) 12 (92.3%) 87 (100.0%)
# using spread()
df %>% gather(k,v,-gender) %>% group_by(gender,k,v) %>%
summarise(n=n()) %>% mutate(perc=round((n/sum(n))*100,2)) %>%
mutate(cell=paste0(n,' (',sprintf("%.1f",perc),'%)')) %>%
select(-n,-perc) %>%
filter(v=='Y') %>%
spread(k,cell,fill=0)
# A tibble: 2 x 6
# Groups: gender [2]
gender v ethn.a ethn.b ethn.c ethn.d
<fct> <chr> <chr> <chr> <chr> <chr>
1 F Y 11 (84.6%) 10 (76.9%) 3 (23.1%) 12 (92.3%)
2 M Y 69 (79.3%) 66 (75.9%) 28 (32.2%) 87 (100.0%)