R - stackplot/barplot 随时间变化的比例
R - stackplot/barplot proportion over time
我正在尝试实现这样的分布图
对于每个时期,我需要绘制 "stacked proportion" 图表。
数据基本上是这样的:
400 401 402 403 404 ...
1013662 7 7 7 7 7
1024583 2 2 2 2 2
1024812 6 27 27 27 27
1025491 48 48 48 48 48
1036642 56 56 56 56 56
....
我想到了
ft = function(x) prop.table(table(x), NULL)
apply(dta, MARGIN = 2, FUN = ft)
然后我会得到每个时间段的比例列表
$`400`
2 6 7 19 24 30 42 46 48 56 67
0.05 0.05 0.45 0.05 0.10 0.05 0.05 0.05 0.05 0.05 0.05
我不确定下一步是什么。
我不知道如何正确 unlist
并存储在数据框中?
有什么想法吗?
(基本的barplot(as.matrix(dta))
试过了,好像不太对)。
这是我的数据
dta = structure(c(7L, 2L, 6L, 48L, 56L, 7L, 7L, 7L, 46L, 7L, 7L, 24L,
7L, 19L, 7L, 30L, 7L, 24L, 42L, 67L, 7L, 2L, 27L, 48L, 56L, 7L,
7L, 7L, 67L, 7L, 7L, 24L, 7L, 20L, 7L, 30L, 7L, 67L, 42L, 23L,
7L, 2L, 27L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 24L, 7L, 20L,
7L, 30L, 7L, 67L, 42L, 23L, 7L, 2L, 27L, 48L, 56L, 7L, 7L, 7L,
67L, 7L, 7L, 24L, 7L, 20L, 7L, 30L, 7L, 67L, 42L, 23L, 7L, 2L,
27L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 24L, 7L, 20L, 7L, 30L,
7L, 67L, 42L, 23L, 7L, 2L, 27L, 48L, 56L, 7L, 7L, 7L, 67L, 7L,
7L, 24L, 7L, 20L, 7L, 30L, 7L, 67L, 42L, 23L, 7L, 2L, 27L, 48L,
56L, 7L, 7L, 7L, 67L, 7L, 7L, 24L, 7L, 20L, 7L, 30L, 7L, 67L,
42L, 23L, 7L, 2L, 27L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 24L,
7L, 20L, 7L, 30L, 7L, 67L, 42L, 23L, 7L, 2L, 27L, 48L, 56L, 7L,
7L, 7L, 67L, 7L, 7L, 24L, 7L, 20L, 7L, 30L, 7L, 67L, 42L, 23L,
7L, 2L, 27L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 24L, 7L, 20L,
7L, 30L, 7L, 67L, 42L, 23L, 7L, 2L, 27L, 48L, 56L, 7L, 7L, 7L,
67L, 7L, 7L, 24L, 7L, 20L, 7L, 30L, 7L, 67L, 42L, 23L, 7L, 2L,
49L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 67L, 7L, 20L, 7L, 30L,
7L, 24L, 42L, 23L, 7L, 2L, 49L, 48L, 56L, 7L, 7L, 7L, 67L, 7L,
7L, 67L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 23L, 7L, 2L, 49L, 48L,
56L, 7L, 7L, 7L, 67L, 7L, 7L, 67L, 7L, 20L, 7L, 30L, 7L, 24L,
42L, 23L, 7L, 2L, 49L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 67L,
7L, 20L, 7L, 30L, 7L, 24L, 42L, 23L, 7L, 2L, 49L, 48L, 56L, 7L,
7L, 7L, 67L, 7L, 7L, 67L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 23L,
7L, 2L, 49L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 67L, 7L, 20L,
7L, 30L, 7L, 24L, 42L, 23L, 7L, 2L, 49L, 48L, 56L, 7L, 7L, 7L,
67L, 7L, 7L, 67L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 23L, 7L, 2L,
49L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 67L, 7L, 20L, 7L, 30L,
7L, 24L, 42L, 23L, 7L, 2L, 49L, 48L, 56L, 7L, 7L, 7L, 67L, 7L,
7L, 67L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 23L, 7L, 2L, 49L, 48L,
56L, 7L, 7L, 7L, 67L, 7L, 7L, 67L, 7L, 20L, 7L, 30L, 7L, 24L,
42L, 23L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L,
7L, 20L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L,
7L, 7L, 24L, 7L, 7L, 22L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 68L,
7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 20L,
7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L,
24L, 7L, 7L, 22L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L,
49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 20L, 7L, 30L,
7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L,
7L, 22L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L,
67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 20L, 7L, 30L, 7L, 24L,
42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L,
7L, 20L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L,
7L, 7L, 24L, 7L, 7L, 22L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 68L,
7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 20L,
7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L,
24L, 7L, 7L, 22L, 7L, 21L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L,
49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 21L, 7L, 30L,
7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L,
7L, 22L, 7L, 21L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L,
67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 21L, 7L, 30L, 7L, 24L,
42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L,
7L, 21L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L,
7L, 7L, 24L, 7L, 7L, 22L, 7L, 21L, 7L, 30L, 7L, 24L, 42L, 68L,
7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 21L,
7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L,
24L, 7L, 7L, 22L, 7L, 21L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L,
49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 21L, 7L, 30L,
7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L,
7L, 22L, 7L, 21L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 4L, 48L,
67L, 7L, 7L, 7L, 20L, 7L, 7L, 22L, 7L, 18L, 7L, 30L, 7L, 24L,
42L, 68L, 7L, 2L, 4L, 48L, 67L, 7L, 7L, 7L, 20L, 7L, 7L, 22L,
7L, 18L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 4L, 48L, 67L, 7L,
7L, 7L, 20L, 7L, 7L, 22L, 7L, 18L, 7L, 30L, 7L, 24L, 42L, 68L,
7L, 2L, 4L, 48L, 67L, 7L, 7L, 7L, 20L, 7L, 7L, 22L, 7L, 18L,
7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 4L, 48L, 67L, 7L, 7L, 7L,
20L, 7L, 7L, 22L, 7L, 18L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L,
4L, 48L, 67L, 7L, 7L, 7L, 20L, 7L, 7L, 22L, 7L, 18L, 7L, 30L,
7L, 24L, 42L, 68L, 7L, 2L, 4L, 48L, 67L, 7L, 7L, 7L, 20L, 7L,
7L, 22L, 7L, 18L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 4L, 48L,
67L, 7L, 7L, 7L, 20L, 7L, 7L, 22L, 7L, 18L, 7L, 30L, 7L, 24L,
42L, 68L, 7L, 2L, 4L, 48L, 67L, 7L, 7L, 7L, 20L, 7L, 7L, 22L,
7L, 18L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 4L, 48L, 67L, 7L,
7L, 7L, 20L, 7L, 7L, 22L, 7L, 18L, 7L, 30L, 7L, 24L, 42L, 68L
), .Dim = c(20L, 51L), .Dimnames = list(c("1013662", "1024583",
"1024812", "1025491", "1036642", "1037551", "1037552", "1037991",
"1037992", "1040291", "1040292", "1041101", "1041212", "1041651",
"104221", "104222", "104331", "104332", "104992", "1052571"),
c("400", "401", "402", "403", "404", "405", "406", "407",
"408", "409", "410", "411", "412", "413", "414", "415", "416",
"417", "418", "419", "420", "421", "422", "423", "424", "425",
"426", "427", "428", "429", "430", "431", "432", "433", "434",
"435", "436", "437", "438", "439", "440", "441", "442", "443",
"444", "445", "446", "447", "448", "449", "450")))
在阅读而不是浏览 post 之后,我了解到您想要绘制值的频率比例。数值本身并不重要(如果需要,可以考虑或强制转换为因数)。
然后,一种方法是使用 table
进行堆叠和计数。
barplot(table(stack(as.data.frame(dta)))/nrow(dta),space=0,col=rainbow(nrow(dta)))
使用 ggplot2
、dplyr
和 reshape2
:
library(ggplot2)
library(reshape2)
library(dplyr)
dta2 <- dta %>% melt %>%
group_by(Var2, value) %>%
summarise(count = n( )/nrow(dta))
ggplot(dta2, aes(x = Var2, fill = factor(value), y = count)) +
geom_bar(stat = "identity", width = 1, colour = "black")
我正在尝试实现这样的分布图
对于每个时期,我需要绘制 "stacked proportion" 图表。
数据基本上是这样的:
400 401 402 403 404 ...
1013662 7 7 7 7 7
1024583 2 2 2 2 2
1024812 6 27 27 27 27
1025491 48 48 48 48 48
1036642 56 56 56 56 56
....
我想到了
ft = function(x) prop.table(table(x), NULL)
apply(dta, MARGIN = 2, FUN = ft)
然后我会得到每个时间段的比例列表
$`400`
2 6 7 19 24 30 42 46 48 56 67
0.05 0.05 0.45 0.05 0.10 0.05 0.05 0.05 0.05 0.05 0.05
我不确定下一步是什么。
我不知道如何正确 unlist
并存储在数据框中?
有什么想法吗?
(基本的barplot(as.matrix(dta))
试过了,好像不太对)。
这是我的数据
dta = structure(c(7L, 2L, 6L, 48L, 56L, 7L, 7L, 7L, 46L, 7L, 7L, 24L,
7L, 19L, 7L, 30L, 7L, 24L, 42L, 67L, 7L, 2L, 27L, 48L, 56L, 7L,
7L, 7L, 67L, 7L, 7L, 24L, 7L, 20L, 7L, 30L, 7L, 67L, 42L, 23L,
7L, 2L, 27L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 24L, 7L, 20L,
7L, 30L, 7L, 67L, 42L, 23L, 7L, 2L, 27L, 48L, 56L, 7L, 7L, 7L,
67L, 7L, 7L, 24L, 7L, 20L, 7L, 30L, 7L, 67L, 42L, 23L, 7L, 2L,
27L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 24L, 7L, 20L, 7L, 30L,
7L, 67L, 42L, 23L, 7L, 2L, 27L, 48L, 56L, 7L, 7L, 7L, 67L, 7L,
7L, 24L, 7L, 20L, 7L, 30L, 7L, 67L, 42L, 23L, 7L, 2L, 27L, 48L,
56L, 7L, 7L, 7L, 67L, 7L, 7L, 24L, 7L, 20L, 7L, 30L, 7L, 67L,
42L, 23L, 7L, 2L, 27L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 24L,
7L, 20L, 7L, 30L, 7L, 67L, 42L, 23L, 7L, 2L, 27L, 48L, 56L, 7L,
7L, 7L, 67L, 7L, 7L, 24L, 7L, 20L, 7L, 30L, 7L, 67L, 42L, 23L,
7L, 2L, 27L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 24L, 7L, 20L,
7L, 30L, 7L, 67L, 42L, 23L, 7L, 2L, 27L, 48L, 56L, 7L, 7L, 7L,
67L, 7L, 7L, 24L, 7L, 20L, 7L, 30L, 7L, 67L, 42L, 23L, 7L, 2L,
49L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 67L, 7L, 20L, 7L, 30L,
7L, 24L, 42L, 23L, 7L, 2L, 49L, 48L, 56L, 7L, 7L, 7L, 67L, 7L,
7L, 67L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 23L, 7L, 2L, 49L, 48L,
56L, 7L, 7L, 7L, 67L, 7L, 7L, 67L, 7L, 20L, 7L, 30L, 7L, 24L,
42L, 23L, 7L, 2L, 49L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 67L,
7L, 20L, 7L, 30L, 7L, 24L, 42L, 23L, 7L, 2L, 49L, 48L, 56L, 7L,
7L, 7L, 67L, 7L, 7L, 67L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 23L,
7L, 2L, 49L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 67L, 7L, 20L,
7L, 30L, 7L, 24L, 42L, 23L, 7L, 2L, 49L, 48L, 56L, 7L, 7L, 7L,
67L, 7L, 7L, 67L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 23L, 7L, 2L,
49L, 48L, 56L, 7L, 7L, 7L, 67L, 7L, 7L, 67L, 7L, 20L, 7L, 30L,
7L, 24L, 42L, 23L, 7L, 2L, 49L, 48L, 56L, 7L, 7L, 7L, 67L, 7L,
7L, 67L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 23L, 7L, 2L, 49L, 48L,
56L, 7L, 7L, 7L, 67L, 7L, 7L, 67L, 7L, 20L, 7L, 30L, 7L, 24L,
42L, 23L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L,
7L, 20L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L,
7L, 7L, 24L, 7L, 7L, 22L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 68L,
7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 20L,
7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L,
24L, 7L, 7L, 22L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L,
49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 20L, 7L, 30L,
7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L,
7L, 22L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L,
67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 20L, 7L, 30L, 7L, 24L,
42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L,
7L, 20L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L,
7L, 7L, 24L, 7L, 7L, 22L, 7L, 20L, 7L, 30L, 7L, 24L, 42L, 68L,
7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 20L,
7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L,
24L, 7L, 7L, 22L, 7L, 21L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L,
49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 21L, 7L, 30L,
7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L,
7L, 22L, 7L, 21L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L,
67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 21L, 7L, 30L, 7L, 24L,
42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L,
7L, 21L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L,
7L, 7L, 24L, 7L, 7L, 22L, 7L, 21L, 7L, 30L, 7L, 24L, 42L, 68L,
7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 21L,
7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L,
24L, 7L, 7L, 22L, 7L, 21L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L,
49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L, 7L, 22L, 7L, 21L, 7L, 30L,
7L, 24L, 42L, 68L, 7L, 2L, 49L, 48L, 67L, 7L, 7L, 7L, 24L, 7L,
7L, 22L, 7L, 21L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 4L, 48L,
67L, 7L, 7L, 7L, 20L, 7L, 7L, 22L, 7L, 18L, 7L, 30L, 7L, 24L,
42L, 68L, 7L, 2L, 4L, 48L, 67L, 7L, 7L, 7L, 20L, 7L, 7L, 22L,
7L, 18L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 4L, 48L, 67L, 7L,
7L, 7L, 20L, 7L, 7L, 22L, 7L, 18L, 7L, 30L, 7L, 24L, 42L, 68L,
7L, 2L, 4L, 48L, 67L, 7L, 7L, 7L, 20L, 7L, 7L, 22L, 7L, 18L,
7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 4L, 48L, 67L, 7L, 7L, 7L,
20L, 7L, 7L, 22L, 7L, 18L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L,
4L, 48L, 67L, 7L, 7L, 7L, 20L, 7L, 7L, 22L, 7L, 18L, 7L, 30L,
7L, 24L, 42L, 68L, 7L, 2L, 4L, 48L, 67L, 7L, 7L, 7L, 20L, 7L,
7L, 22L, 7L, 18L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 4L, 48L,
67L, 7L, 7L, 7L, 20L, 7L, 7L, 22L, 7L, 18L, 7L, 30L, 7L, 24L,
42L, 68L, 7L, 2L, 4L, 48L, 67L, 7L, 7L, 7L, 20L, 7L, 7L, 22L,
7L, 18L, 7L, 30L, 7L, 24L, 42L, 68L, 7L, 2L, 4L, 48L, 67L, 7L,
7L, 7L, 20L, 7L, 7L, 22L, 7L, 18L, 7L, 30L, 7L, 24L, 42L, 68L
), .Dim = c(20L, 51L), .Dimnames = list(c("1013662", "1024583",
"1024812", "1025491", "1036642", "1037551", "1037552", "1037991",
"1037992", "1040291", "1040292", "1041101", "1041212", "1041651",
"104221", "104222", "104331", "104332", "104992", "1052571"),
c("400", "401", "402", "403", "404", "405", "406", "407",
"408", "409", "410", "411", "412", "413", "414", "415", "416",
"417", "418", "419", "420", "421", "422", "423", "424", "425",
"426", "427", "428", "429", "430", "431", "432", "433", "434",
"435", "436", "437", "438", "439", "440", "441", "442", "443",
"444", "445", "446", "447", "448", "449", "450")))
在阅读而不是浏览 post 之后,我了解到您想要绘制值的频率比例。数值本身并不重要(如果需要,可以考虑或强制转换为因数)。
然后,一种方法是使用 table
进行堆叠和计数。
barplot(table(stack(as.data.frame(dta)))/nrow(dta),space=0,col=rainbow(nrow(dta)))
使用 ggplot2
、dplyr
和 reshape2
:
library(ggplot2)
library(reshape2)
library(dplyr)
dta2 <- dta %>% melt %>%
group_by(Var2, value) %>%
summarise(count = n( )/nrow(dta))
ggplot(dta2, aes(x = Var2, fill = factor(value), y = count)) +
geom_bar(stat = "identity", width = 1, colour = "black")