基于时间的比例计算
Proportion calculation based on time
我有一个数据集,其中包含在不同时间点进行的测量。我想计算一个时间段内的测量在下一个时间段内跟随相同测量的次数的百分比。我想知道每一行从一个时期到下一个时期有多少次相同的测量值。我该怎么做?
示例数据:
structure(list(t1 = c(1, 2, 1), t2 = c(1, 1, 1), t3 = c(1, 3,
4), t4 = c(2, 2, 2), t5 = c(3, 3, 3), t6 = c(3, 3, 3), t7 = c(1,
1, 1)), row.names = c(NA, -3L), spec = structure(list(cols = list(
t1 = structure(list(), class = c("collector_double", "collector"
)), t2 = structure(list(), class = c("collector_double",
"collector")), t3 = structure(list(), class = c("collector_double",
"collector")), t4 = structure(list(), class = c("collector_double",
"collector")), t5 = structure(list(), class = c("collector_double",
"collector")), t6 = structure(list(), class = c("collector_double",
"collector")), t7 = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
要将每个时间段与前一个时间段进行比较,可能最简单的方法是将数据放在长格式中并与滞后进行比较:
library(dplyr)
library(tidyr)
timedata |>
mutate(id = row_number()) |>
pivot_longer(
-id,
names_to = "time"
) |>
group_by(id) |>
mutate(nochange = value == lag(value)) |>
group_by(time) |>
summarise(
num_repeated = sum(nochange, na.rm = TRUE),
percent_repeated = num_repeated / n() * 100
)
# A tibble: 7 x 2
# time num_repeated percent_repeated
# <chr> <int> <dbl>
# 1 t1 0 0
# 2 t2 2 66.7
# 3 t3 1 33.3
# 4 t4 0 0
# 5 t5 0 0
# 6 t6 3 100
# 7 t7 0 0
如果你调用你的数据框df
。那么:
equal <- as.data.frame(NA)
for (i in 1:(length(df)-1)) {
for (j in 1:nrow(df)) {
equal[j,i] <- df[j,i]== df[j, i+1]
}
}
sum(equal[TRUE])*100/(nrow(df)* length(df))
请注意,这会比较 t1= t2(最后一列中不可能进行比较,因为没有 'posterior' 测量值)
我有一个数据集,其中包含在不同时间点进行的测量。我想计算一个时间段内的测量在下一个时间段内跟随相同测量的次数的百分比。我想知道每一行从一个时期到下一个时期有多少次相同的测量值。我该怎么做?
示例数据:
structure(list(t1 = c(1, 2, 1), t2 = c(1, 1, 1), t3 = c(1, 3,
4), t4 = c(2, 2, 2), t5 = c(3, 3, 3), t6 = c(3, 3, 3), t7 = c(1,
1, 1)), row.names = c(NA, -3L), spec = structure(list(cols = list(
t1 = structure(list(), class = c("collector_double", "collector"
)), t2 = structure(list(), class = c("collector_double",
"collector")), t3 = structure(list(), class = c("collector_double",
"collector")), t4 = structure(list(), class = c("collector_double",
"collector")), t5 = structure(list(), class = c("collector_double",
"collector")), t6 = structure(list(), class = c("collector_double",
"collector")), t7 = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
要将每个时间段与前一个时间段进行比较,可能最简单的方法是将数据放在长格式中并与滞后进行比较:
library(dplyr)
library(tidyr)
timedata |>
mutate(id = row_number()) |>
pivot_longer(
-id,
names_to = "time"
) |>
group_by(id) |>
mutate(nochange = value == lag(value)) |>
group_by(time) |>
summarise(
num_repeated = sum(nochange, na.rm = TRUE),
percent_repeated = num_repeated / n() * 100
)
# A tibble: 7 x 2
# time num_repeated percent_repeated
# <chr> <int> <dbl>
# 1 t1 0 0
# 2 t2 2 66.7
# 3 t3 1 33.3
# 4 t4 0 0
# 5 t5 0 0
# 6 t6 3 100
# 7 t7 0 0
如果你调用你的数据框df
。那么:
equal <- as.data.frame(NA)
for (i in 1:(length(df)-1)) {
for (j in 1:nrow(df)) {
equal[j,i] <- df[j,i]== df[j, i+1]
}
}
sum(equal[TRUE])*100/(nrow(df)* length(df))
请注意,这会比较 t1= t2(最后一列中不可能进行比较,因为没有 'posterior' 测量值)