如何计算字符的天数持久性
how to count persistance in days for characters
假设我们有以下数据:
type <- paste("type", c(1,1,1,2,3,1,2,2,3,3,3,3,1,1))
dates <- seq(as.Date("2000/1/1"), by = "days", length.out = length(type))
mydataframe <- data.frame(type, dates)
我在其他 posts 中看到 rle
可能会完成这项工作,但我想获得一个数据框,其中对于每种类型,我的平均持续时间为几天。类似于:
> print(persistance)
type1 type2 type3
1 2 1.5 2.5
有人知道怎么做吗?
谢谢!
data.table
library(data.table)
runs <- setDT(rle(as.character(mydataframe$type)))
runs[, mean(lengths), values]
# values V1
# 1: type 1 2.0
# 2: type 2 1.5
# 3: type 3 2.5
tidyverse 和 magrittr
library(tidyverse)
library(magrittr)
rle(as.character(mydataframe$type)) %$%
tibble(lengths, values) %>%
group_by(values) %>%
summarise_all(mean)
# # A tibble: 3 x 2
# values lengths
# <chr> <dbl>
# 1 type 1 2.00
# 2 type 2 1.50
# 3 type 3 2.50
dplyr
library(dplyr)
rle(as.character(mydataframe$type)) %>%
unclass %>%
as.data.frame %>%
group_by(values) %>%
summarise_all(mean)
另一种(分组)解决方案:
type <- paste("type", c(1,1,1,2,3,1,2,2,3,3,3,3,1,1))
dates <- seq(as.Date("2000/1/1"), by = "days", length.out = length(type))
mydataframe <- data.frame(type, dates)
library(dplyr)
mydataframe %>%
count(type, group = cumsum(type != lag(type, default = first(type)))) %>%
group_by(type) %>%
summarise(Avg = mean(n))
# # A tibble: 3 x 2
# type Avg
# <fct> <dbl>
# 1 type 1 2
# 2 type 2 1.5
# 3 type 3 2.5
您可以使用基本 R 函数 rle
和 aggregate
来执行此操作。
# set up the data as in your question
type <- paste("type", c(1,1,1,2,3,1,2,2,3,3,3,3,1,1))
dates <- seq(as.Date("2000/1/1"), by = "days", length.out = length(type))
mydataframe <- data.frame(type, dates)
# calculate the length of the run using rle
runs <- rle(as.character(mydataframe$type))
# calculate the average length of the run
aggregate(runs[[1]], by = runs[2], FUN = mean)
请注意,这假设您的日期列中的日期确实是连续的。如果您在日期上有差距并且想将其视为单独的运行,则必须稍微更改公式才能真正使用 dates
列中的日期。
假设我们有以下数据:
type <- paste("type", c(1,1,1,2,3,1,2,2,3,3,3,3,1,1))
dates <- seq(as.Date("2000/1/1"), by = "days", length.out = length(type))
mydataframe <- data.frame(type, dates)
我在其他 posts 中看到 rle
可能会完成这项工作,但我想获得一个数据框,其中对于每种类型,我的平均持续时间为几天。类似于:
> print(persistance)
type1 type2 type3
1 2 1.5 2.5
有人知道怎么做吗? 谢谢!
data.table
library(data.table)
runs <- setDT(rle(as.character(mydataframe$type)))
runs[, mean(lengths), values]
# values V1
# 1: type 1 2.0
# 2: type 2 1.5
# 3: type 3 2.5
tidyverse 和 magrittr
library(tidyverse)
library(magrittr)
rle(as.character(mydataframe$type)) %$%
tibble(lengths, values) %>%
group_by(values) %>%
summarise_all(mean)
# # A tibble: 3 x 2
# values lengths
# <chr> <dbl>
# 1 type 1 2.00
# 2 type 2 1.50
# 3 type 3 2.50
dplyr
library(dplyr)
rle(as.character(mydataframe$type)) %>%
unclass %>%
as.data.frame %>%
group_by(values) %>%
summarise_all(mean)
另一种(分组)解决方案:
type <- paste("type", c(1,1,1,2,3,1,2,2,3,3,3,3,1,1))
dates <- seq(as.Date("2000/1/1"), by = "days", length.out = length(type))
mydataframe <- data.frame(type, dates)
library(dplyr)
mydataframe %>%
count(type, group = cumsum(type != lag(type, default = first(type)))) %>%
group_by(type) %>%
summarise(Avg = mean(n))
# # A tibble: 3 x 2
# type Avg
# <fct> <dbl>
# 1 type 1 2
# 2 type 2 1.5
# 3 type 3 2.5
您可以使用基本 R 函数 rle
和 aggregate
来执行此操作。
# set up the data as in your question
type <- paste("type", c(1,1,1,2,3,1,2,2,3,3,3,3,1,1))
dates <- seq(as.Date("2000/1/1"), by = "days", length.out = length(type))
mydataframe <- data.frame(type, dates)
# calculate the length of the run using rle
runs <- rle(as.character(mydataframe$type))
# calculate the average length of the run
aggregate(runs[[1]], by = runs[2], FUN = mean)
请注意,这假设您的日期列中的日期确实是连续的。如果您在日期上有差距并且想将其视为单独的运行,则必须稍微更改公式才能真正使用 dates
列中的日期。