来自 SQL 查询的堆积密度图
Stacked density chart from SQL query
我有一个 SQL 查询的结果,看起来像这样(一个没有特定含义的测试查询):
week cash ccard fcard mobile total
9 3.45 0.00 0.00 0.00 3.45
10 13.02 17.18 4.32 21.24 55.76
11 47.61 24.52 12.32 32.18 116.63
12 21.32 61.96 17.32 1.40 102.00
13 181.80 1.70 275.20 3.50 462.20
14 390.14 191.80 10.08 100.40 692.42
15 102.40 207.80 101.40 0.00 411.60
这个查询的结果进入一个数据框,我想将其绘制为堆叠密度图,其中 'week' 应该在 X 轴上,Y 轴应该是分数 'cash/total','ccard/total' 等等。我该怎么做?我用谷歌搜索,但到目前为止我发现的所有示例似乎都不适用于 SQL 输出。
提前致谢...
一般来说,ggplot2
更喜欢数据采用“长”格式,而目前数据采用“宽”格式。在 SQL 术语中,这是一个 PIVOT
,虽然我发现使用 tidyr::pivot_*
和 data.table::melt
和 ::dcast
比 SQL 更容易使用].
我的意思是:
library(dplyr)
library(tidyr) # just for pivot_longer
dat <- pivot_longer(dat, cash:mobile) %>%
mutate(pct = (value / total))
dat
# # A tibble: 28 x 5
# week total name value pct
# <int> <dbl> <chr> <dbl> <dbl>
# 1 9 3.45 cash 3.45 1
# 2 9 3.45 ccard 0 0
# 3 9 3.45 fcard 0 0
# 4 9 3.45 mobile 0 0
# 5 10 55.8 cash 13.0 0.234
# 6 10 55.8 ccard 17.2 0.308
# 7 10 55.8 fcard 4.32 0.0775
# 8 10 55.8 mobile 21.2 0.381
# 9 11 117. cash 47.6 0.408
# 10 11 117. ccard 24.5 0.210
# # ... with 18 more rows
有了它,你就可以做到
library(ggplot2)
# library(scales) # percent
ggplot(dat, aes(week, pct, fill=name)) +
geom_density(position="fill", stat="identity") +
scale_y_continuous(labels = scales::percent)
(我应该补充一点,这个图的“密度”性质有点欺骗:每周点之间有数据的建议。因为 x-axis 是有效离散的,具有低“n ",我建议使用 @RyanJohn 建议的条形图。)
这是条形图 - 如果您想要的话。
library(tidyverse)
library(scales)
df1 <- structure(list(week = c(9, 10, 11, 12, 13, 14, 15), cash = c(3.45,
13.02, 47.61, 21.32, 181.8, 390.14, 102.4), ccard = c(0, 17.18,
24.52, 61.96, 1.7, 191.8, 207.8), fcard = c(0, 4.32, 12.32, 17.32,
275.2, 10.08, 101.4), mobile = c(0, 21.24, 32.18, 1.4, 3.5, 100.4,
0), total = c(3.45, 55.76, 116.63, 102, 462.2, 692.42, 411.6)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -7L), spec = structure(list(
cols = list(week = structure(list(), class = c("collector_double",
"collector")), cash = structure(list(), class = c("collector_double",
"collector")), ccard = structure(list(), class = c("collector_double",
"collector")), fcard = structure(list(), class = c("collector_double",
"collector")), mobile = structure(list(), class = c("collector_double",
"collector")), total = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
df1 %>%
pivot_longer(cols = c(-week, -total),
names_to = "type",
values_to = "amount") %>%
mutate(pct = amount / total) %>%
ggplot(aes(week, pct, fill = type))+
geom_col() +
scale_y_continuous(labels = scales::percent_format())+
labs(title = "% spend by payment type")
由 reprex package (v0.3.0)
于 2020-08-12 创建
我有一个 SQL 查询的结果,看起来像这样(一个没有特定含义的测试查询):
week cash ccard fcard mobile total
9 3.45 0.00 0.00 0.00 3.45
10 13.02 17.18 4.32 21.24 55.76
11 47.61 24.52 12.32 32.18 116.63
12 21.32 61.96 17.32 1.40 102.00
13 181.80 1.70 275.20 3.50 462.20
14 390.14 191.80 10.08 100.40 692.42
15 102.40 207.80 101.40 0.00 411.60
这个查询的结果进入一个数据框,我想将其绘制为堆叠密度图,其中 'week' 应该在 X 轴上,Y 轴应该是分数 'cash/total','ccard/total' 等等。我该怎么做?我用谷歌搜索,但到目前为止我发现的所有示例似乎都不适用于 SQL 输出。
提前致谢...
一般来说,ggplot2
更喜欢数据采用“长”格式,而目前数据采用“宽”格式。在 SQL 术语中,这是一个 PIVOT
,虽然我发现使用 tidyr::pivot_*
和 data.table::melt
和 ::dcast
比 SQL 更容易使用].
我的意思是:
library(dplyr)
library(tidyr) # just for pivot_longer
dat <- pivot_longer(dat, cash:mobile) %>%
mutate(pct = (value / total))
dat
# # A tibble: 28 x 5
# week total name value pct
# <int> <dbl> <chr> <dbl> <dbl>
# 1 9 3.45 cash 3.45 1
# 2 9 3.45 ccard 0 0
# 3 9 3.45 fcard 0 0
# 4 9 3.45 mobile 0 0
# 5 10 55.8 cash 13.0 0.234
# 6 10 55.8 ccard 17.2 0.308
# 7 10 55.8 fcard 4.32 0.0775
# 8 10 55.8 mobile 21.2 0.381
# 9 11 117. cash 47.6 0.408
# 10 11 117. ccard 24.5 0.210
# # ... with 18 more rows
有了它,你就可以做到
library(ggplot2)
# library(scales) # percent
ggplot(dat, aes(week, pct, fill=name)) +
geom_density(position="fill", stat="identity") +
scale_y_continuous(labels = scales::percent)
(我应该补充一点,这个图的“密度”性质有点欺骗:每周点之间有数据的建议。因为 x-axis 是有效离散的,具有低“n ",我建议使用 @RyanJohn 建议的条形图。)
这是条形图 - 如果您想要的话。
library(tidyverse)
library(scales)
df1 <- structure(list(week = c(9, 10, 11, 12, 13, 14, 15), cash = c(3.45,
13.02, 47.61, 21.32, 181.8, 390.14, 102.4), ccard = c(0, 17.18,
24.52, 61.96, 1.7, 191.8, 207.8), fcard = c(0, 4.32, 12.32, 17.32,
275.2, 10.08, 101.4), mobile = c(0, 21.24, 32.18, 1.4, 3.5, 100.4,
0), total = c(3.45, 55.76, 116.63, 102, 462.2, 692.42, 411.6)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -7L), spec = structure(list(
cols = list(week = structure(list(), class = c("collector_double",
"collector")), cash = structure(list(), class = c("collector_double",
"collector")), ccard = structure(list(), class = c("collector_double",
"collector")), fcard = structure(list(), class = c("collector_double",
"collector")), mobile = structure(list(), class = c("collector_double",
"collector")), total = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
df1 %>%
pivot_longer(cols = c(-week, -total),
names_to = "type",
values_to = "amount") %>%
mutate(pct = amount / total) %>%
ggplot(aes(week, pct, fill = type))+
geom_col() +
scale_y_continuous(labels = scales::percent_format())+
labs(title = "% spend by payment type")
由 reprex package (v0.3.0)
于 2020-08-12 创建