name 变量同时使用 complete
name variable at the same time using complete
我想得到一个按年龄段分类的小计。示例数据和代码为:
set.seed(12345)
#create a numeric variable Age
AGE <- sample(0:110, 100, replace = TRUE)
# Creat Data fame
Sample.data <-data.frame(AGE)
summary_data<- Sample.data %>%
group_by(grp = cut(
AGE,
breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
right=TRUE,
labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
)) %>% summarise("Total People" = n())%>% complete(grp = levels(grp), fill = list("Total People = 0))
给出的结果如下所示:
是否可以将 grp 标记为“年龄段”?并按年龄排序?
我试着按照下面的方式定义名称,但结果很奇怪。我做错了什么?
summary_data<- Sample.data %>%
group_by("Age Group" = cut(
AGE,
breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
right=TRUE,
labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
)) %>% summarise("Total People" = n())%>% complete("Age Group" = levels("Age Group"), fill = list("Total People" = 0))
新的结果是:
变量现在有名称“Age Group”,但是table没有为没有观测值的年龄组填0。我该怎么办
理想的结果应该是这样的:
我们可以将 levels
中的双引号更改为反引号以评估 complete
中的列。列名称是非标准的,即它包含 space.
...
%>%
complete("Age Group" = levels(`Age Group`), fill = list("Total People" = 0))
# A tibble: 7 x 2
# `Age Group` `Total People`
# <chr> <dbl>
#1 Adolescent(13-17.999 yrs) 14
#2 Adult(18-64.999 yrs.) 37
#3 Child(2-12.999 yrs) 2
#4 Elderly(65-199 yrs) 46
#5 Foetus(0 yr) 1
#6 Infant(0.083-1.999 yrs) 0
#7 Neonate (0.001 - 0.082 yr) 0
如果我们想arrange
Sample.data %>%
group_by("Age Group" = cut(
AGE,
breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
right=TRUE,
labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
)) %>%
summarise("Total People" = n()) %>%
complete("Age Group" = levels(`Age Group`), fill = list("Total People" = 0)) %>%
arrange(`Total People`)
我想得到一个按年龄段分类的小计。示例数据和代码为:
set.seed(12345)
#create a numeric variable Age
AGE <- sample(0:110, 100, replace = TRUE)
# Creat Data fame
Sample.data <-data.frame(AGE)
summary_data<- Sample.data %>%
group_by(grp = cut(
AGE,
breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
right=TRUE,
labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
)) %>% summarise("Total People" = n())%>% complete(grp = levels(grp), fill = list("Total People = 0))
给出的结果如下所示:
是否可以将 grp 标记为“年龄段”?并按年龄排序?
我试着按照下面的方式定义名称,但结果很奇怪。我做错了什么?
summary_data<- Sample.data %>%
group_by("Age Group" = cut(
AGE,
breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
right=TRUE,
labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
)) %>% summarise("Total People" = n())%>% complete("Age Group" = levels("Age Group"), fill = list("Total People" = 0))
新的结果是:
变量现在有名称“Age Group”,但是table没有为没有观测值的年龄组填0。我该怎么办
理想的结果应该是这样的:
我们可以将 levels
中的双引号更改为反引号以评估 complete
中的列。列名称是非标准的,即它包含 space.
...
%>%
complete("Age Group" = levels(`Age Group`), fill = list("Total People" = 0))
# A tibble: 7 x 2
# `Age Group` `Total People`
# <chr> <dbl>
#1 Adolescent(13-17.999 yrs) 14
#2 Adult(18-64.999 yrs.) 37
#3 Child(2-12.999 yrs) 2
#4 Elderly(65-199 yrs) 46
#5 Foetus(0 yr) 1
#6 Infant(0.083-1.999 yrs) 0
#7 Neonate (0.001 - 0.082 yr) 0
如果我们想arrange
Sample.data %>%
group_by("Age Group" = cut(
AGE,
breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
right=TRUE,
labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
)) %>%
summarise("Total People" = n()) %>%
complete("Age Group" = levels(`Age Group`), fill = list("Total People" = 0)) %>%
arrange(`Total People`)