R:带有持续时间的ggplot
R: ggplot with durations
问题:您对使用 ggplot2
处理 durations 的建议(作者:Hadley Wickham)。具体来说:使用自定义中断和合适的标签重现下面的图。偏好最少使用自定义函数 and/or 重构数据。欢迎对我没有引用的软件包提出建议。
数据以秒为单位存储(参见下面的 df
)。我想显示人眼可读的中断和标签,例如天而不是数千秒,中断发生在 0、1、2... 天而不是尴尬的分数。
努力证明:下面的第一个示例将持续时间作为整数处理,并通过适当的个案除以 60/24/365 的倍数等实现 objective。第二个示例使用基础 R
difftime
个对象。为了在这种情况下正确处理,我必须使用 strptime
函数并减去 1
。我错过了什么吗?第三个示例使用 lubridate
包中的 duration
class。虽然使用 day()
和 seconds_to_period()
函数指定标签非常容易,但我在设置自定义中断方面做得不是很好。第四个例子使用了hms
class。我设法指定了休息时间,但没有指定标签。也欢迎就如何为下面的每个示例编写更短的代码行提出任何建议。
# Data
df = data.frame(x = 1:6,
num = c(374400, 343500, 174000, 193500, 197700, 270300))
# base/difftime
df$difftime <- as.difftime(df$num, units = "secs")
# lubridate/duration
library("lubridate") # devtools::install_github("tidyverse/lubridate") # the dev version fixes a bug
df$duration <- duration(df$num, units = "seconds")
# hms/hms
library("hms")
df$hms <- as.hms(df$num)
library("ggplot2")
library("scales")
# 1: data is base/numeric
# Pro: no package dependence
# Con: Hard work
breaks = seq(0, 100*60*60, 20*60*60)
labels = function(x) round(x/60/60/24, 0)
ggplot(data = df, aes(x = x, y = num)) +
geom_bar(stat = "identity", fill = "lightblue") +
scale_y_continuous(name = "Duration (Days)",
breaks = breaks,
labels = labels) +
labs(title = "Data stored as numeric (seconds)",
subtitle = "breaks = seq(0, 100*60*60, 20*60*60)\nlabels = function(x) round(x/60/60/24, 0)",
x = NULL)
ggsave("base-num.png")
# 2: data is base/difftime
# Pro: simple once you get over the ``strftime(x, "%d")`` syntax.
# Unresolved: Why do I need to subtract a day?
labels = function(x) as.integer(strftime(x, "%d"))-1
ggplot(data = df, aes(x = x, y = difftime)) +
geom_bar(stat = "identity", fill = "lightblue") +
scale_y_time(name = "Duration (Days)",
labels = labels) +
labs(title = "Data stored as difftime (seconds)",
subtitle = "default breaks\nlabels = function(x) as.integer(strftime(x, '%d'))-1",
x = NULL)
ggsave("base-difftime.png")
# 3: data is lubridate/duration
# Pro: intuitive combination of day() and seconds_to_period() functions
# Unresolved: a better way to make own breaks?
breaks = as.duration(seq(0, 5, 1)*60*60*24)
labels = function(x) day(seconds_to_period(x))
ggplot(data = df, aes(x = x, y = duration)) +
geom_bar(stat = "identity", fill = "lightblue") +
scale_y_continuous(name = "Duration (Days)",
breaks = breaks,
labels = labels) +
labs(title = "Data stored as duration (seconds)",
subtitle = "breaks = as.duration(seq(0, 5, 1)*60*60*24)\nlabels = function(x)lubridate::day(lubridate::seconds_to_period(x))",
x = NULL)
ggsave("lubridate-duration.png")
# 4: data is hms/hms
# Pro: Immediately generates plot with acceptable labels
# Unresolved: how to make own labels: Failed attempts:
labels = 0:(length(breaks)-1)
labels = function(x)lubridate::day(x)
breaks = seq(0, 5, 1)*60*60*24
ggplot(data = df, aes(x = x, y = hms)) +
geom_bar(stat = "identity", fill = "lightblue") +
scale_y_continuous(name = "Duration (Seconds)",
breaks = breaks) +
labs(title = "Data stored as hms (seconds)",
subtitle = "breaks = seq(0, 5, 1)*60*60*24\ndefault labels",
x = NULL)
ggsave("hms-hms.png")
编辑 按照 Axeman 在评论部分的建议,这是将 ggplot
与 hms
对象组合的方法。这在我看来是 4 个中最方便的,尽管不得不承认减去 1
是出乎意料的。 Axeman,你想post这个作为答案吗?
breaks = hms::hms(days = 0:4)
labels = function(x) lubridate::day(x)-1
恕我直言,建议的解决方案对我来说过于复杂。
如果持续时间以整数秒给出并且需要以天为单位绘制,我的方法是在对 aes()
:
的调用中对其进行缩放
df = data.frame(x = 1:6,
num = c(374400, 343500, 174000, 193500, 197700, 270300))
library("ggplot2")
ggplot(data = df, aes(x = x, y = num / (24*60*60))) +
geom_col(fill = "lightblue") +
labs(title = "Data stored as numeric (seconds)",
y = "Duration (Days)",
x = NULL)
因此,无需 fiddle 关于中断和标签。
N.B.: geom_col()
是 geom_bar(stat = "identity")
.
的替代品
问题:您对使用 ggplot2
处理 durations 的建议(作者:Hadley Wickham)。具体来说:使用自定义中断和合适的标签重现下面的图。偏好最少使用自定义函数 and/or 重构数据。欢迎对我没有引用的软件包提出建议。
数据以秒为单位存储(参见下面的 df
)。我想显示人眼可读的中断和标签,例如天而不是数千秒,中断发生在 0、1、2... 天而不是尴尬的分数。
努力证明:下面的第一个示例将持续时间作为整数处理,并通过适当的个案除以 60/24/365 的倍数等实现 objective。第二个示例使用基础 R
difftime
个对象。为了在这种情况下正确处理,我必须使用 strptime
函数并减去 1
。我错过了什么吗?第三个示例使用 lubridate
包中的 duration
class。虽然使用 day()
和 seconds_to_period()
函数指定标签非常容易,但我在设置自定义中断方面做得不是很好。第四个例子使用了hms
class。我设法指定了休息时间,但没有指定标签。也欢迎就如何为下面的每个示例编写更短的代码行提出任何建议。
# Data
df = data.frame(x = 1:6,
num = c(374400, 343500, 174000, 193500, 197700, 270300))
# base/difftime
df$difftime <- as.difftime(df$num, units = "secs")
# lubridate/duration
library("lubridate") # devtools::install_github("tidyverse/lubridate") # the dev version fixes a bug
df$duration <- duration(df$num, units = "seconds")
# hms/hms
library("hms")
df$hms <- as.hms(df$num)
library("ggplot2")
library("scales")
# 1: data is base/numeric
# Pro: no package dependence
# Con: Hard work
breaks = seq(0, 100*60*60, 20*60*60)
labels = function(x) round(x/60/60/24, 0)
ggplot(data = df, aes(x = x, y = num)) +
geom_bar(stat = "identity", fill = "lightblue") +
scale_y_continuous(name = "Duration (Days)",
breaks = breaks,
labels = labels) +
labs(title = "Data stored as numeric (seconds)",
subtitle = "breaks = seq(0, 100*60*60, 20*60*60)\nlabels = function(x) round(x/60/60/24, 0)",
x = NULL)
ggsave("base-num.png")
# 2: data is base/difftime
# Pro: simple once you get over the ``strftime(x, "%d")`` syntax.
# Unresolved: Why do I need to subtract a day?
labels = function(x) as.integer(strftime(x, "%d"))-1
ggplot(data = df, aes(x = x, y = difftime)) +
geom_bar(stat = "identity", fill = "lightblue") +
scale_y_time(name = "Duration (Days)",
labels = labels) +
labs(title = "Data stored as difftime (seconds)",
subtitle = "default breaks\nlabels = function(x) as.integer(strftime(x, '%d'))-1",
x = NULL)
ggsave("base-difftime.png")
# 3: data is lubridate/duration
# Pro: intuitive combination of day() and seconds_to_period() functions
# Unresolved: a better way to make own breaks?
breaks = as.duration(seq(0, 5, 1)*60*60*24)
labels = function(x) day(seconds_to_period(x))
ggplot(data = df, aes(x = x, y = duration)) +
geom_bar(stat = "identity", fill = "lightblue") +
scale_y_continuous(name = "Duration (Days)",
breaks = breaks,
labels = labels) +
labs(title = "Data stored as duration (seconds)",
subtitle = "breaks = as.duration(seq(0, 5, 1)*60*60*24)\nlabels = function(x)lubridate::day(lubridate::seconds_to_period(x))",
x = NULL)
ggsave("lubridate-duration.png")
# 4: data is hms/hms
# Pro: Immediately generates plot with acceptable labels
# Unresolved: how to make own labels: Failed attempts:
labels = 0:(length(breaks)-1)
labels = function(x)lubridate::day(x)
breaks = seq(0, 5, 1)*60*60*24
ggplot(data = df, aes(x = x, y = hms)) +
geom_bar(stat = "identity", fill = "lightblue") +
scale_y_continuous(name = "Duration (Seconds)",
breaks = breaks) +
labs(title = "Data stored as hms (seconds)",
subtitle = "breaks = seq(0, 5, 1)*60*60*24\ndefault labels",
x = NULL)
ggsave("hms-hms.png")
编辑 按照 Axeman 在评论部分的建议,这是将 ggplot
与 hms
对象组合的方法。这在我看来是 4 个中最方便的,尽管不得不承认减去 1
是出乎意料的。 Axeman,你想post这个作为答案吗?
breaks = hms::hms(days = 0:4)
labels = function(x) lubridate::day(x)-1
恕我直言,建议的解决方案对我来说过于复杂。
如果持续时间以整数秒给出并且需要以天为单位绘制,我的方法是在对 aes()
:
df = data.frame(x = 1:6,
num = c(374400, 343500, 174000, 193500, 197700, 270300))
library("ggplot2")
ggplot(data = df, aes(x = x, y = num / (24*60*60))) +
geom_col(fill = "lightblue") +
labs(title = "Data stored as numeric (seconds)",
y = "Duration (Days)",
x = NULL)
因此,无需 fiddle 关于中断和标签。
N.B.: geom_col()
是 geom_bar(stat = "identity")
.