使用 ggplot 在 R 中分组频率条
Grouped Frequency Bars in R using ggplot
我正在尝试生成一个包含多个组频率的条形图。我尝试使用 geom_bar() 但我将 运行 保留为 "Error: stat_count() must not be used with a y aesthetic." 每个参与者都有一行,年龄(2 类)、条件(2 类)和他们的表现(0或 1).根据我在手册和网上几乎所有地方阅读的内容,如果我使用
bar<-ggplot(data, aes(age, performance, fill = condition)) + geom_bar(position = "dodge")
我应该得到我想要的 (which is this), but instead I get the error and I can't figure out what I'm missing. Isn't the geom_bar() supposed to give count by default? When I use stat="identity" I get full bars like so: how it actually looks。
请帮忙!任何建议将不胜感激。
已编辑:
这是我的实际数据:
structure(list(ageyears = c(4L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 5L, 4L,
5L, 5L, 4L, 4L, 4L, 5L, 4L, 4L, 5L, 4L, 5L, 4L, 4L, 5L, 5L, 4L,
4L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 5L, 5L, 4L, 5L, 5L, 4L, 4L,
5L, 5L, 5L, 4L, 5L, 5L, 4L, 5L, 5L, 4L, 4L, 5L, 4L, 5L, 5L, 4L,
5L, 4L, 4L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L,
5L, 5L, 5L, 4L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 5L,
4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 5L, 5L,
5L, 5L, 4L, 4L, 4L, 5L, 4L), MatrixLabels = structure(c(2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), Mat_sort_pass_fail = c(0L, 0L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L)), .Names = c("ageyears",
"MatrixLabels", "Mat_sort_pass_fail"), row.names = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 11L, 12L, 13L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 48L,
49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 60L, 61L, 62L,
63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 74L, 75L, 76L,
77L, 78L, 79L, 80L, 82L, 83L, 85L, 86L, 87L, 88L, 89L, 90L, 91L,
92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L,
104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L,
115L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L, 125L,
126L, 127L, 128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L,
137L, 138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L,
148L, 149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L,
159L, 160L, 197L, 198L, 200L, 201L, 202L, 203L, 204L, 205L, 206L,
207L), class = "data.frame")
通常它会根据您的数据计算频率。如果您的数据已经分组,请尝试以下操作:
+ geom_bar(stat="identity",position = "dodge")
您可以使用 geom_col()
作为 geom_bar(stat = "identity")
的别名。
你也有我认为错误的 aes 映射。
我根据您发布的图表模拟了一些数据:
df <- data.frame(age = factor(rep(4:5, each = 2), labels = c('4-Years-Olds', '5-Years-Olds')),
performance = c(48,37,65,65),
condition = factor(c(1,2,1,2), labels = c('No Label', 'Label')))
library(ggplot2)
ggplot(df) +
geom_col(aes(condition, performance, fill = age), position = 'dodge') +
scale_fill_manual(values = c('skyblue', 'darkolivegreen1'))
来自 geom_bar 的文档:
By default, geom_bar uses stat="count" which makes the height of the
bar proportion to the number of cases in each group (or if the weight
aethetic is supplied, the sum of the weights). If you want the heights
of the bars to represent values in the data, use stat="identity" and
map a variable to the y aesthetic.
在你的情况下,你应该使用高度作为你的表现总和,因为你有一个汇总数据,所以 ggplot 应该使用 stat = identity
编辑 OP 粘贴 dput 后:
你需要先总结你的数据,我假设 df
是你的数据框,你可以使用任何东西来做总结,我正在使用 data.table
和 baseR 聚合,你可以选择他们中的任何一个都按照以下方式进行操作:
###1. base R aggregate
df <- aggregate(Mat_sort_pass_fail ~ ageyears + MatrixLabels, data=df1 ,sum)
df$perc <- df$Mat_sort_pass_fail/sum(df$Mat_sort_pass_fail)
names(df) <- c("age","condition","performance","percentage")
###2. sumarization using data.table
library(data.table)
dt <- setDT(df)
dt1 <- dt[,list(Performance = sum(Mat_sort_pass_fail)),by=c("ageyears","MatrixLabels")]
dt1[,perc:=Performance/sum(Performance)] ##percentage within column
df <- data.frame(dt1)
names(df) <- c("age","condition","performance","percentage")
library(ggplot2)
library(RColorBrewer)
ggplot(df, aes(x = condition ,y=performance)) +
geom_bar(aes(fill = factor(age)),stat="identity",position = "dodge") +
ggtitle("Matrix Sort Performance") +
scale_fill_brewer(palette = "Dark2")
###In case you need the percentage run the below code:
ggplot(df, aes(x = condition ,y=percentage)) +
geom_bar(aes(fill = factor(age)),stat="identity",position = "dodge") +
ggtitle("Matrix Sort Performance") +
scale_fill_brewer(palette = "Dark2")
我正在尝试生成一个包含多个组频率的条形图。我尝试使用 geom_bar() 但我将 运行 保留为 "Error: stat_count() must not be used with a y aesthetic." 每个参与者都有一行,年龄(2 类)、条件(2 类)和他们的表现(0或 1).根据我在手册和网上几乎所有地方阅读的内容,如果我使用
bar<-ggplot(data, aes(age, performance, fill = condition)) + geom_bar(position = "dodge")
我应该得到我想要的 (which is this), but instead I get the error and I can't figure out what I'm missing. Isn't the geom_bar() supposed to give count by default? When I use stat="identity" I get full bars like so: how it actually looks。 请帮忙!任何建议将不胜感激。
已编辑: 这是我的实际数据:
structure(list(ageyears = c(4L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 5L, 4L,
5L, 5L, 4L, 4L, 4L, 5L, 4L, 4L, 5L, 4L, 5L, 4L, 4L, 5L, 5L, 4L,
4L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 5L, 5L, 4L, 5L, 5L, 4L, 4L,
5L, 5L, 5L, 4L, 5L, 5L, 4L, 5L, 5L, 4L, 4L, 5L, 4L, 5L, 5L, 4L,
5L, 4L, 4L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L,
5L, 5L, 5L, 4L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 5L,
4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 5L, 5L,
5L, 5L, 4L, 4L, 4L, 5L, 4L), MatrixLabels = structure(c(2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), Mat_sort_pass_fail = c(0L, 0L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L)), .Names = c("ageyears",
"MatrixLabels", "Mat_sort_pass_fail"), row.names = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 11L, 12L, 13L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 48L,
49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 60L, 61L, 62L,
63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 74L, 75L, 76L,
77L, 78L, 79L, 80L, 82L, 83L, 85L, 86L, 87L, 88L, 89L, 90L, 91L,
92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L,
104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L,
115L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L, 125L,
126L, 127L, 128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L,
137L, 138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L,
148L, 149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L,
159L, 160L, 197L, 198L, 200L, 201L, 202L, 203L, 204L, 205L, 206L,
207L), class = "data.frame")
通常它会根据您的数据计算频率。如果您的数据已经分组,请尝试以下操作:
+ geom_bar(stat="identity",position = "dodge")
您可以使用 geom_col()
作为 geom_bar(stat = "identity")
的别名。
你也有我认为错误的 aes 映射。
我根据您发布的图表模拟了一些数据:
df <- data.frame(age = factor(rep(4:5, each = 2), labels = c('4-Years-Olds', '5-Years-Olds')),
performance = c(48,37,65,65),
condition = factor(c(1,2,1,2), labels = c('No Label', 'Label')))
library(ggplot2)
ggplot(df) +
geom_col(aes(condition, performance, fill = age), position = 'dodge') +
scale_fill_manual(values = c('skyblue', 'darkolivegreen1'))
来自 geom_bar 的文档:
By default, geom_bar uses stat="count" which makes the height of the bar proportion to the number of cases in each group (or if the weight aethetic is supplied, the sum of the weights). If you want the heights of the bars to represent values in the data, use stat="identity" and map a variable to the y aesthetic.
在你的情况下,你应该使用高度作为你的表现总和,因为你有一个汇总数据,所以 ggplot 应该使用 stat = identity
编辑 OP 粘贴 dput 后:
你需要先总结你的数据,我假设 df
是你的数据框,你可以使用任何东西来做总结,我正在使用 data.table
和 baseR 聚合,你可以选择他们中的任何一个都按照以下方式进行操作:
###1. base R aggregate
df <- aggregate(Mat_sort_pass_fail ~ ageyears + MatrixLabels, data=df1 ,sum)
df$perc <- df$Mat_sort_pass_fail/sum(df$Mat_sort_pass_fail)
names(df) <- c("age","condition","performance","percentage")
###2. sumarization using data.table
library(data.table)
dt <- setDT(df)
dt1 <- dt[,list(Performance = sum(Mat_sort_pass_fail)),by=c("ageyears","MatrixLabels")]
dt1[,perc:=Performance/sum(Performance)] ##percentage within column
df <- data.frame(dt1)
names(df) <- c("age","condition","performance","percentage")
library(ggplot2)
library(RColorBrewer)
ggplot(df, aes(x = condition ,y=performance)) +
geom_bar(aes(fill = factor(age)),stat="identity",position = "dodge") +
ggtitle("Matrix Sort Performance") +
scale_fill_brewer(palette = "Dark2")
###In case you need the percentage run the below code:
ggplot(df, aes(x = condition ,y=percentage)) +
geom_bar(aes(fill = factor(age)),stat="identity",position = "dodge") +
ggtitle("Matrix Sort Performance") +
scale_fill_brewer(palette = "Dark2")