条形图 + geom_jitter 以及条形图填充内的抖动点
Bar chart + geom_jitter with the points of the jitter inside the fills of the bar chart
我想要一个我正在研究的人口分布的条形图,这个条形图的 x 轴是年龄(计数是 y),填充是种族。我想用 geom_scatter 覆盖相应组内的一些主题。但是它会创建自己的轴。
我无法共享数据,但这里有一个虚拟的 tibble
df = tribble(
~id, ~agegroup, ~ethnicity,
#--|--|----
"a", "20s", "African Descent",
"b", "30s", "White",
"c", "50s", "White",
"d", "40s", "Hispanic",
"e", "20s", "White",
"f", "30s", "Hispanic",
"g", "20s", "Hispanic",
"h", "30s", "White",
"i", "20s", "African Descent",
"j", "30s", "White",
"k", "50s", "White",
"l", "20s", "White",
"m", "30s", "Hispanic",
"n", "20s", "Hispanic",
"o", "30s", "White",
)
df
dmplot <- ggplot(df, aes(x = agegroup, fill = ethnicity )) +
geom_bar(stat = "count")+
labs(
x = "Age Group",
y = paste0("Population (total = ", df %>% nrow(), ")"))+
geom_jitter(df,
aes(x = agegroup,
y = ethnicity)) # this is where I would need to retrieve the geom_bar fill location
dmplot
这有点老套,但我认为这样就可以了。不幸的是,您似乎无法将抖动高度分配给美学,但您可以找到另一种方法使高度依赖于矩形的高度。
df = tribble(
~id, ~agegroup, ~ethnicity,
#--|--|----
"a", "20s", "African Descent",
"b", "30s", "White",
"c", "50s", "White",
"d", "40s", "Hispanic",
"e", "20s", "White",
"f", "30s", "Hispanic",
"g", "20s", "Hispanic",
"h", "30s", "White",
"i", "20s", "African Descent",
"j", "30s", "White",
"k", "50s", "White",
"l", "20s", "White",
"m", "30s", "Hispanic",
"n", "20s", "Hispanic",
"o", "30s", "White",
)
df_2 <- df %>%
count(agegroup, ethnicity) %>%
group_by(agegroup ) %>%
mutate(top_rect = cumsum(n),
bottom_rect = lag(top_rect, default = 0))
df_2_uncounted <- df_2 %>%
ungroup() %>%
uncount(n)
ggplot(df_2) +
geom_rect( aes(xmin = as.numeric(as.factor(agegroup)) - .45,
xmax= as.numeric(as.factor(agegroup)) + .45,
ymin = bottom_rect,
ymax = top_rect,
fill = ethnicity )) +
geom_jitter(data = df_2_uncounted,
aes(x = as.numeric(as.factor(agegroup)),
y = (bottom_rect + top_rect)/2),
width = .3,
height = .5) +
scale_x_continuous(breaks = unique(as.numeric(as.factor(df_2$agegroup))),
labels = levels(as.factor(df_2$agegroup))) +
labs(
x = "Age Group",
y = paste0("Population (total = ", df_2_uncounted %>% nrow(), ")"))
更新
现在有标签
df_2_uncounted <- df_2 %>%
ungroup() %>%
uncount(n)%>%
arrange(agegroup, ethnicity) %>%
group_by(agegroup, ethnicity) %>%
mutate(id2 = 1:n()) %>%
left_join(df %>%
arrange(agegroup, ethnicity) %>%
group_by(agegroup, ethnicity) %>%
mutate(id2 = 1:n()),
by = c("agegroup", "ethnicity", "id2"))
ggplot(df_2) +
geom_rect( aes(xmin = as.numeric(as.factor(agegroup)) - .45,
xmax= as.numeric(as.factor(agegroup)) + .45,
ymin = bottom_rect,
ymax = top_rect,
fill = ethnicity )) +
geom_jitter(data = df_2_uncounted,
aes(x = as.numeric(as.factor(agegroup)),
y = (bottom_rect + top_rect)/2),
position = position_jitter(seed = 1, height =0.5)) +
geom_text(data = df_2_uncounted,
aes(x = as.numeric(as.factor(agegroup)),
y = (bottom_rect + top_rect)/2,
label = id),
position = position_jitter(seed = 1, height =0.5))+
scale_x_continuous(breaks = unique(as.numeric(as.factor(df_2$agegroup))),
labels = levels(as.factor(df_2$agegroup))) +
labs(
x = "Age Group",
y = paste0("Population (total = ", df_2_uncounted %>% nrow(), ")"))
我想要一个我正在研究的人口分布的条形图,这个条形图的 x 轴是年龄(计数是 y),填充是种族。我想用 geom_scatter 覆盖相应组内的一些主题。但是它会创建自己的轴。 我无法共享数据,但这里有一个虚拟的 tibble
df = tribble(
~id, ~agegroup, ~ethnicity,
#--|--|----
"a", "20s", "African Descent",
"b", "30s", "White",
"c", "50s", "White",
"d", "40s", "Hispanic",
"e", "20s", "White",
"f", "30s", "Hispanic",
"g", "20s", "Hispanic",
"h", "30s", "White",
"i", "20s", "African Descent",
"j", "30s", "White",
"k", "50s", "White",
"l", "20s", "White",
"m", "30s", "Hispanic",
"n", "20s", "Hispanic",
"o", "30s", "White",
)
df
dmplot <- ggplot(df, aes(x = agegroup, fill = ethnicity )) +
geom_bar(stat = "count")+
labs(
x = "Age Group",
y = paste0("Population (total = ", df %>% nrow(), ")"))+
geom_jitter(df,
aes(x = agegroup,
y = ethnicity)) # this is where I would need to retrieve the geom_bar fill location
dmplot
这有点老套,但我认为这样就可以了。不幸的是,您似乎无法将抖动高度分配给美学,但您可以找到另一种方法使高度依赖于矩形的高度。
df = tribble(
~id, ~agegroup, ~ethnicity,
#--|--|----
"a", "20s", "African Descent",
"b", "30s", "White",
"c", "50s", "White",
"d", "40s", "Hispanic",
"e", "20s", "White",
"f", "30s", "Hispanic",
"g", "20s", "Hispanic",
"h", "30s", "White",
"i", "20s", "African Descent",
"j", "30s", "White",
"k", "50s", "White",
"l", "20s", "White",
"m", "30s", "Hispanic",
"n", "20s", "Hispanic",
"o", "30s", "White",
)
df_2 <- df %>%
count(agegroup, ethnicity) %>%
group_by(agegroup ) %>%
mutate(top_rect = cumsum(n),
bottom_rect = lag(top_rect, default = 0))
df_2_uncounted <- df_2 %>%
ungroup() %>%
uncount(n)
ggplot(df_2) +
geom_rect( aes(xmin = as.numeric(as.factor(agegroup)) - .45,
xmax= as.numeric(as.factor(agegroup)) + .45,
ymin = bottom_rect,
ymax = top_rect,
fill = ethnicity )) +
geom_jitter(data = df_2_uncounted,
aes(x = as.numeric(as.factor(agegroup)),
y = (bottom_rect + top_rect)/2),
width = .3,
height = .5) +
scale_x_continuous(breaks = unique(as.numeric(as.factor(df_2$agegroup))),
labels = levels(as.factor(df_2$agegroup))) +
labs(
x = "Age Group",
y = paste0("Population (total = ", df_2_uncounted %>% nrow(), ")"))
更新
现在有标签
df_2_uncounted <- df_2 %>%
ungroup() %>%
uncount(n)%>%
arrange(agegroup, ethnicity) %>%
group_by(agegroup, ethnicity) %>%
mutate(id2 = 1:n()) %>%
left_join(df %>%
arrange(agegroup, ethnicity) %>%
group_by(agegroup, ethnicity) %>%
mutate(id2 = 1:n()),
by = c("agegroup", "ethnicity", "id2"))
ggplot(df_2) +
geom_rect( aes(xmin = as.numeric(as.factor(agegroup)) - .45,
xmax= as.numeric(as.factor(agegroup)) + .45,
ymin = bottom_rect,
ymax = top_rect,
fill = ethnicity )) +
geom_jitter(data = df_2_uncounted,
aes(x = as.numeric(as.factor(agegroup)),
y = (bottom_rect + top_rect)/2),
position = position_jitter(seed = 1, height =0.5)) +
geom_text(data = df_2_uncounted,
aes(x = as.numeric(as.factor(agegroup)),
y = (bottom_rect + top_rect)/2,
label = id),
position = position_jitter(seed = 1, height =0.5))+
scale_x_continuous(breaks = unique(as.numeric(as.factor(df_2$agegroup))),
labels = levels(as.factor(df_2$agegroup))) +
labs(
x = "Age Group",
y = paste0("Population (total = ", df_2_uncounted %>% nrow(), ")"))