因素的排序。 ggplot2 vs plyr,将标签位置固定到条形图的问题
Ordering of factors . ggplot2 vs plyr, problems fixing label position to barplot
我正在尝试绘制一个在框段上带有标签的堆叠箱线图。
但是 ggplot2 通过排序识别和绘制的因子变量强加的排序,plyr 似乎不遵守此排序,并且试图通过 order.by 强制排序给出缺失值,这可能是公式的原因。
pos = order_by(Segment, 1-((cumsum(antal) - (0.5 * antal))/sum(antal) ) )
如果我将因子 1000+ 更改为 999,则预期结果与字典顺序的预期结果一致
重现的数据和代码
tblVector_Value <- structure(list(antal = c(190L, 21L, 33L, 18L, 241L, 77L,102L, 42L, 45L, 32L),
last_Year = c(2020L, 2021L, 2020L, 2021L, 2020L, 2021L, 2020L, 2021L, 2020L, 2021L),
Segment = structure(c(1L, 1L, 3L, 3L, 2L, 2L, 4L, 4L, 5L, 5L),
.Label = c("0-100", "100-200", "1000+", "200-300", "300-400", "Error"),class = "factor")),
.Names = c("antal", "last_Year", "Segment"),
row.names = c(5L, 6L, 11L, 12L, 18L, 19L, 24L, 25L, 30L, 31L),
class = "data.frame")
library(ggplot2)
library(plyr)
## create factor
tblVector_Value$Segment <-factor(tblVector_Value$Segment, c("0-100", "100-200","200-300","300-400", "1000+"))
tblVector_Value <- ddply(tblVector_Value, .(last_Year), transform, pos = 1-((cumsum(antal) - (0.5 * antal))/sum(antal) ))
ggplot(tblVector_Value[order(tblVector_Value$Segment, decreasing = T),],
aes(x=last_Year, y = antal,fill = Segment)) +
geom_bar(position = "fill",stat = "identity") +
scale_y_continuous(labels = percent_format()) + labs(title="% segment") +
geom_text(aes(label = antal, y = pos), size = 3)
在 gplot2
的最新版本中,geom_text()
也可以正确放置堆叠条形图和填充条形图的标签。因此,如果可以接受标签未准确放置在框的中间,则无需使用 plyr
或任何其他包来明确计算位置。
tblVector_Value$Segment <- factor(tblVector_Value$Segment,
c("0-100", "100-200","200-300","300-400", "1000+"))
# turn last_Year into factor to avoid continuous x scale
tblVector_Value$last_Year <- factor(tblVector_Value$last_Year)
library(ggplot2) # CRAN version 2.2.1 used
ggplot(tblVector_Value, aes(x = last_Year, y = antal, fill = Segment, label = antal)) +
geom_col(position = "fill") +
scale_y_continuous(labels = scales::percent) + labs(title="% segment") +
geom_text(size = 3, position = "fill", vjust = 2)
请注意,vjust
可用于微调文本标签的位置。试试 vjust = 1.5
或 vjust = 1.0
.
我正在尝试绘制一个在框段上带有标签的堆叠箱线图。 但是 ggplot2 通过排序识别和绘制的因子变量强加的排序,plyr 似乎不遵守此排序,并且试图通过 order.by 强制排序给出缺失值,这可能是公式的原因。
pos = order_by(Segment, 1-((cumsum(antal) - (0.5 * antal))/sum(antal) ) )
如果我将因子 1000+ 更改为 999,则预期结果与字典顺序的预期结果一致
重现的数据和代码
tblVector_Value <- structure(list(antal = c(190L, 21L, 33L, 18L, 241L, 77L,102L, 42L, 45L, 32L),
last_Year = c(2020L, 2021L, 2020L, 2021L, 2020L, 2021L, 2020L, 2021L, 2020L, 2021L),
Segment = structure(c(1L, 1L, 3L, 3L, 2L, 2L, 4L, 4L, 5L, 5L),
.Label = c("0-100", "100-200", "1000+", "200-300", "300-400", "Error"),class = "factor")),
.Names = c("antal", "last_Year", "Segment"),
row.names = c(5L, 6L, 11L, 12L, 18L, 19L, 24L, 25L, 30L, 31L),
class = "data.frame")
library(ggplot2)
library(plyr)
## create factor
tblVector_Value$Segment <-factor(tblVector_Value$Segment, c("0-100", "100-200","200-300","300-400", "1000+"))
tblVector_Value <- ddply(tblVector_Value, .(last_Year), transform, pos = 1-((cumsum(antal) - (0.5 * antal))/sum(antal) ))
ggplot(tblVector_Value[order(tblVector_Value$Segment, decreasing = T),],
aes(x=last_Year, y = antal,fill = Segment)) +
geom_bar(position = "fill",stat = "identity") +
scale_y_continuous(labels = percent_format()) + labs(title="% segment") +
geom_text(aes(label = antal, y = pos), size = 3)
在 gplot2
的最新版本中,geom_text()
也可以正确放置堆叠条形图和填充条形图的标签。因此,如果可以接受标签未准确放置在框的中间,则无需使用 plyr
或任何其他包来明确计算位置。
tblVector_Value$Segment <- factor(tblVector_Value$Segment,
c("0-100", "100-200","200-300","300-400", "1000+"))
# turn last_Year into factor to avoid continuous x scale
tblVector_Value$last_Year <- factor(tblVector_Value$last_Year)
library(ggplot2) # CRAN version 2.2.1 used
ggplot(tblVector_Value, aes(x = last_Year, y = antal, fill = Segment, label = antal)) +
geom_col(position = "fill") +
scale_y_continuous(labels = scales::percent) + labs(title="% segment") +
geom_text(size = 3, position = "fill", vjust = 2)
请注意,vjust
可用于微调文本标签的位置。试试 vjust = 1.5
或 vjust = 1.0
.