按 ID 将数据帧分成 10 天的间隔
Separating data frame into 10-day intervals by ID
我有一个数据框,我想将其分成按 ID 组织的 10 天间隔。当我使用下面的代码时,它按 ID 将其分隔,但列表中的每个元素不是以 10 天为间隔,并且不是按 ID 组织的。
如何将我的数据分成 10 天的时间间隔并按 ID 分组?
library(lubridate)
date <- rep_len(seq(dmy("26-12-2010"), dmy("20-12-2013"), by = "days"), 500)
ID <- rep(seq(1, 5), 100)
df <- data.frame(date = date,
x = runif(length(date), min = 60000, max = 80000),
y = runif(length(date), min = 800000, max = 900000),
ID)
t <- unique(df$date)[seq(from = 1,
to = length(unique(df$date)),
by = 10)]
interval_10 <- lapply(
1:(length(t)-1),
function(k) df %>%
filter(date == t)
)
我们可以使用
library(dplyr)
library(lubridate)
df %>%
group_by(ID) %>%
mutate(new = ceiling_date(date, '10 day'))
您在寻找这种输出吗?
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
date <- rep_len(seq(dmy("26-12-2010"), dmy("20-12-2013"), by = "days"), 500)
ID <- rep(seq(1, 5), 100)
df <- data.frame(date = date,
x = runif(length(date), min = 60000, max = 80000),
y = runif(length(date), min = 800000, max = 900000),
ID)
df %>%
mutate(interval = map(1:50, ~rep(.x, 10)) %>% reduce(c)) %>%
group_split(interval) %>%
map(~arrange(.x, ID)) %>%
head(5)
#> [[1]]
#> # A tibble: 10 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2010-12-26 75235. 810405. 1 1
#> 2 2010-12-31 78964. 825454. 1 1
#> 3 2010-12-27 63564. 886938. 2 1
#> 4 2011-01-01 70658. 863580. 2 1
#> 5 2010-12-28 73647. 845554. 3 1
#> 6 2011-01-02 60750. 841294. 3 1
#> 7 2010-12-29 69549. 826752. 4 1
#> 8 2011-01-03 63391. 806722. 4 1
#> 9 2010-12-30 62584. 818130. 5 1
#> 10 2011-01-04 79600. 823551. 5 1
#>
#> [[2]]
#> # A tibble: 10 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-05 68145. 806577. 1 2
#> 2 2011-01-10 73122. 845198. 1 2
#> 3 2011-01-06 65635. 833174. 2 2
#> 4 2011-01-11 61972. 846711. 2 2
#> 5 2011-01-07 73767. 888569. 3 2
#> 6 2011-01-12 64636. 866264. 3 2
#> 7 2011-01-08 69169. 810342. 4 2
#> 8 2011-01-13 79168. 885329. 4 2
#> 9 2011-01-09 60065. 858075. 5 2
#> 10 2011-01-14 79825. 809081. 5 2
#>
#> [[3]]
#> # A tibble: 10 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-15 60489. 878544. 1 3
#> 2 2011-01-20 79112. 803445. 1 3
#> 3 2011-01-16 64206. 824595. 2 3
#> 4 2011-01-21 73777. 893237. 2 3
#> 5 2011-01-17 60151. 885401. 3 3
#> 6 2011-01-22 60348. 829403. 3 3
#> 7 2011-01-18 76682. 869436. 4 3
#> 8 2011-01-23 64845. 885666. 4 3
#> 9 2011-01-19 64418. 847046. 5 3
#> 10 2011-01-24 69272. 857423. 5 3
#>
#> [[4]]
#> # A tibble: 10 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-25 62626. 845889. 1 4
#> 2 2011-01-30 67183. 851174. 1 4
#> 3 2011-01-26 67983. 806809. 2 4
#> 4 2011-01-31 75358. 805705. 2 4
#> 5 2011-01-27 60954. 854788. 3 4
#> 6 2011-02-01 67305. 816768. 3 4
#> 7 2011-01-28 79795. 887996. 4 4
#> 8 2011-02-02 63193. 818398. 4 4
#> 9 2011-01-29 66437. 850248. 5 4
#> 10 2011-02-03 68542. 848168. 5 4
#>
#> [[5]]
#> # A tibble: 10 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-02-04 72750. 839064. 1 5
#> 2 2011-02-09 68292. 816603. 1 5
#> 3 2011-02-05 62712. 859081. 2 5
#> 4 2011-02-10 62533. 863694. 2 5
#> 5 2011-02-06 64920. 808720. 3 5
#> 6 2011-02-11 78642. 871171. 3 5
#> 7 2011-02-07 78125. 849347. 4 5
#> 8 2011-02-12 64352. 842637. 4 5
#> 9 2011-02-08 74452. 813705. 5 5
#> 10 2011-02-13 78086. 874101. 5 5
编辑:
df %>%
mutate(interval = map(1:50, ~rep(.x, 10)) %>% reduce(c)) %>%
group_split(interval) %>%
map(~arrange(.x, ID)) %>%
map(~ group_split(.x, ID)) %>%
head(2)
#> [[1]]
#> <list_of<
#> tbl_df<
#> date : date
#> x : double
#> y : double
#> ID : integer
#> interval: integer
#> >
#> >[5]>
#> [[1]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2010-12-26 64016. 858085. 1 1
#> 2 2010-12-31 76973. 810635. 1 1
#>
#> [[2]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2010-12-27 73318. 818689. 2 1
#> 2 2011-01-01 63646. 804369. 2 1
#>
#> [[3]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2010-12-28 77786. 870629. 3 1
#> 2 2011-01-02 63465. 820951. 3 1
#>
#> [[4]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2010-12-29 62191. 851103. 4 1
#> 2 2011-01-03 69102. 874624. 4 1
#>
#> [[5]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2010-12-30 71665. 867749. 5 1
#> 2 2011-01-04 67052. 861228. 5 1
#>
#>
#> [[2]]
#> <list_of<
#> tbl_df<
#> date : date
#> x : double
#> y : double
#> ID : integer
#> interval: integer
#> >
#> >[5]>
#> [[1]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-05 66203. 830146. 1 2
#> 2 2011-01-10 63117. 867473. 1 2
#>
#> [[2]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-06 64386. 810594. 2 2
#> 2 2011-01-11 63491. 839683. 2 2
#>
#> [[3]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-07 70155. 865205. 3 2
#> 2 2011-01-12 74384. 833320. 3 2
#>
#> [[4]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-08 63251. 853296. 4 2
#> 2 2011-01-13 70607. 803073. 4 2
#>
#> [[5]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-09 64284. 889136. 5 2
#> 2 2011-01-14 63190. 866626. 5 2
由 reprex package (v2.0.0)
于 2021-06-27 创建
我有一个数据框,我想将其分成按 ID 组织的 10 天间隔。当我使用下面的代码时,它按 ID 将其分隔,但列表中的每个元素不是以 10 天为间隔,并且不是按 ID 组织的。
如何将我的数据分成 10 天的时间间隔并按 ID 分组?
library(lubridate)
date <- rep_len(seq(dmy("26-12-2010"), dmy("20-12-2013"), by = "days"), 500)
ID <- rep(seq(1, 5), 100)
df <- data.frame(date = date,
x = runif(length(date), min = 60000, max = 80000),
y = runif(length(date), min = 800000, max = 900000),
ID)
t <- unique(df$date)[seq(from = 1,
to = length(unique(df$date)),
by = 10)]
interval_10 <- lapply(
1:(length(t)-1),
function(k) df %>%
filter(date == t)
)
我们可以使用
library(dplyr)
library(lubridate)
df %>%
group_by(ID) %>%
mutate(new = ceiling_date(date, '10 day'))
您在寻找这种输出吗?
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
date <- rep_len(seq(dmy("26-12-2010"), dmy("20-12-2013"), by = "days"), 500)
ID <- rep(seq(1, 5), 100)
df <- data.frame(date = date,
x = runif(length(date), min = 60000, max = 80000),
y = runif(length(date), min = 800000, max = 900000),
ID)
df %>%
mutate(interval = map(1:50, ~rep(.x, 10)) %>% reduce(c)) %>%
group_split(interval) %>%
map(~arrange(.x, ID)) %>%
head(5)
#> [[1]]
#> # A tibble: 10 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2010-12-26 75235. 810405. 1 1
#> 2 2010-12-31 78964. 825454. 1 1
#> 3 2010-12-27 63564. 886938. 2 1
#> 4 2011-01-01 70658. 863580. 2 1
#> 5 2010-12-28 73647. 845554. 3 1
#> 6 2011-01-02 60750. 841294. 3 1
#> 7 2010-12-29 69549. 826752. 4 1
#> 8 2011-01-03 63391. 806722. 4 1
#> 9 2010-12-30 62584. 818130. 5 1
#> 10 2011-01-04 79600. 823551. 5 1
#>
#> [[2]]
#> # A tibble: 10 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-05 68145. 806577. 1 2
#> 2 2011-01-10 73122. 845198. 1 2
#> 3 2011-01-06 65635. 833174. 2 2
#> 4 2011-01-11 61972. 846711. 2 2
#> 5 2011-01-07 73767. 888569. 3 2
#> 6 2011-01-12 64636. 866264. 3 2
#> 7 2011-01-08 69169. 810342. 4 2
#> 8 2011-01-13 79168. 885329. 4 2
#> 9 2011-01-09 60065. 858075. 5 2
#> 10 2011-01-14 79825. 809081. 5 2
#>
#> [[3]]
#> # A tibble: 10 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-15 60489. 878544. 1 3
#> 2 2011-01-20 79112. 803445. 1 3
#> 3 2011-01-16 64206. 824595. 2 3
#> 4 2011-01-21 73777. 893237. 2 3
#> 5 2011-01-17 60151. 885401. 3 3
#> 6 2011-01-22 60348. 829403. 3 3
#> 7 2011-01-18 76682. 869436. 4 3
#> 8 2011-01-23 64845. 885666. 4 3
#> 9 2011-01-19 64418. 847046. 5 3
#> 10 2011-01-24 69272. 857423. 5 3
#>
#> [[4]]
#> # A tibble: 10 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-25 62626. 845889. 1 4
#> 2 2011-01-30 67183. 851174. 1 4
#> 3 2011-01-26 67983. 806809. 2 4
#> 4 2011-01-31 75358. 805705. 2 4
#> 5 2011-01-27 60954. 854788. 3 4
#> 6 2011-02-01 67305. 816768. 3 4
#> 7 2011-01-28 79795. 887996. 4 4
#> 8 2011-02-02 63193. 818398. 4 4
#> 9 2011-01-29 66437. 850248. 5 4
#> 10 2011-02-03 68542. 848168. 5 4
#>
#> [[5]]
#> # A tibble: 10 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-02-04 72750. 839064. 1 5
#> 2 2011-02-09 68292. 816603. 1 5
#> 3 2011-02-05 62712. 859081. 2 5
#> 4 2011-02-10 62533. 863694. 2 5
#> 5 2011-02-06 64920. 808720. 3 5
#> 6 2011-02-11 78642. 871171. 3 5
#> 7 2011-02-07 78125. 849347. 4 5
#> 8 2011-02-12 64352. 842637. 4 5
#> 9 2011-02-08 74452. 813705. 5 5
#> 10 2011-02-13 78086. 874101. 5 5
编辑:
df %>%
mutate(interval = map(1:50, ~rep(.x, 10)) %>% reduce(c)) %>%
group_split(interval) %>%
map(~arrange(.x, ID)) %>%
map(~ group_split(.x, ID)) %>%
head(2)
#> [[1]]
#> <list_of<
#> tbl_df<
#> date : date
#> x : double
#> y : double
#> ID : integer
#> interval: integer
#> >
#> >[5]>
#> [[1]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2010-12-26 64016. 858085. 1 1
#> 2 2010-12-31 76973. 810635. 1 1
#>
#> [[2]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2010-12-27 73318. 818689. 2 1
#> 2 2011-01-01 63646. 804369. 2 1
#>
#> [[3]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2010-12-28 77786. 870629. 3 1
#> 2 2011-01-02 63465. 820951. 3 1
#>
#> [[4]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2010-12-29 62191. 851103. 4 1
#> 2 2011-01-03 69102. 874624. 4 1
#>
#> [[5]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2010-12-30 71665. 867749. 5 1
#> 2 2011-01-04 67052. 861228. 5 1
#>
#>
#> [[2]]
#> <list_of<
#> tbl_df<
#> date : date
#> x : double
#> y : double
#> ID : integer
#> interval: integer
#> >
#> >[5]>
#> [[1]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-05 66203. 830146. 1 2
#> 2 2011-01-10 63117. 867473. 1 2
#>
#> [[2]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-06 64386. 810594. 2 2
#> 2 2011-01-11 63491. 839683. 2 2
#>
#> [[3]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-07 70155. 865205. 3 2
#> 2 2011-01-12 74384. 833320. 3 2
#>
#> [[4]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-08 63251. 853296. 4 2
#> 2 2011-01-13 70607. 803073. 4 2
#>
#> [[5]]
#> # A tibble: 2 x 5
#> date x y ID interval
#> <date> <dbl> <dbl> <int> <int>
#> 1 2011-01-09 64284. 889136. 5 2
#> 2 2011-01-14 63190. 866626. 5 2
由 reprex package (v2.0.0)
于 2021-06-27 创建