使用 lubridate 或 scales 绘制多个数据集

Plot multiple datasets with lubridate or scales

我创建了三个数据集(banks2016banks2017banks2018 按年份过滤。我用这三个数据集制作了一个图,所以三个不同的线不同的颜色。

我遇到的问题是,如果有每周交易,我会在该特定月份的每个月显示四个点。例如,如果我有付款 1-1-168-1-1615-1-1622-1-16,它们都显示在一月份的行上。理想情况下,我希望线和点在一月和二月之间移动。

我尝试了一些不同的方法,包括使用 scalesdate_breaks。我试图改变我使用 lubridate 的方式但无济于事。有什么建议吗?

下面是我的代码。

ggplot(rbind(banks2016,banks2017,banks2018), 
       aes(month(Date, label=TRUE, abbr=TRUE), Balance, 
       group = factor(year(Date)), colour=factor(year(Date)))) +  
  geom_line() +
  geom_point() +
  labs(x="Month", colour="Year") +
  theme_classic()

和 banks2016 的输出。我想根据日期绘制总余额。所以一周又一周连续一行,但 x 实验室是一个月。现在更仔细地查看数据,日期并不像我最初想象的那样总是每周一次。我可能需要重新处理数据。

结构(列表(日期=结构(c(17038,17038,17038,17031, 17029, 17024, 17022, 17017, 17017, 17014, 17009, 17008, 16996, 16989, 16989, 16987, 16987, 16987, 16983), class = "Date"), 借记 = c(NA, 北美, 1686451.25, 北美, 北美, 3111755.91, 北美, 北美, 25100, 3.66, 北美, NA, 313.26, NA, 1566.27, NA, NA, NA, 0.8), Credits = c(14693.48, 10250, 北美, 409.25, 5655863.07, 北美, 2304.45, 2443, 北美, 北美, 300, 122, NA, 8716.45, NA, 30000, 25000, 5993.6, NA), 余额 = c(15824841.24, 15810147.76, 15799897.76, 17486349.01, 17485939.76, 11830076.69, 14941832.6, 14939528.15, 14937085.15, 14962185.15, 14962188.81, 14961888.81, 14961766.81, 14962080.07, 14953363.62, 14954929.89, 14924929.89, 14899929.89, 14893936.29)), row.names = c(NA, -19L ), class = "data.frame")

听起来您希望 x 轴显示 1 月至 12 月,而每条线显示一个单独日历年随时间变化的余额;那正确吗?如果是这样,一种技术(在 中描述)是创建一个新的日期列,将所有日期放在同一年,并绘制它,但按实际日期中的年份分组。以下是查找数据集的方式:

library(ggplot2)
library(lubridate)
library(dplyr)

# Posted dataset.
banks = structure(list(Date = structure(c(17038, 17038, 17038, 17031, 17029, 17024, 17022, 17017, 17017, 17014, 17009, 17008, 16996, 16989, 16989, 16987, 16987, 16987, 16983), class = "Date"), Debits = c(NA, NA, 1686451.25, NA, NA, 3111755.91, NA, NA, 25100, 3.66, NA, NA, 313.26, NA, 1566.27, NA, NA, NA, 0.8), Credits = c(14693.48, 10250, NA, 409.25, 5655863.07, NA, 2304.45, 2443, NA, NA, 300, 122, NA, 8716.45, NA, 30000, 25000, 5993.6, NA), Balance = c(15824841.24, 15810147.76, 15799897.76, 17486349.01, 17485939.76, 11830076.69, 14941832.6, 14939528.15, 14937085.15, 14962185.15, 14962188.81, 14961888.81, 14961766.81, 14962080.07, 14953363.62, 14954929.89, 14924929.89, 14899929.89, 14893936.29)), row.names = c(NA, -19L ), class = "data.frame")
# The posted dataset is for only one year (2016).  Duplicate it for 2017 and
# 2018, and change the balances a bit, so we can see the grouping.
banks = bind_rows(
  banks,
  banks %>%
    mutate(Date = Date + years(1),
           Balance = Balance * 1.1),
  banks %>%
    mutate(Date = Date + years(2),
           Balance = Balance * 1.2)
)

# Add a utility "date for plotting" field that puts all the dates in the year
# 2000.
banks = banks %>%
  mutate(DateToPlot = Date - years(year(Date) - 2000))

# Plot Balance as a function of DateToPlot.  Group/color by year.  Make the
# x-axis labels look pretty.
ggplot(banks, 
       aes(x = DateToPlot, y = Balance,
           group = factor(year(Date)), colour=factor(year(Date)))) +  
  geom_line() +
  geom_point() +
  scale_x_date(date_breaks = "1 month",
               date_labels = "%B") +
  labs(x="Month", colour="Year") +
  theme_classic()