将一天分成多个部分
Cut day into multiple parts
要为我的数据找到合适的聚合级别,我必须将一天分成不同大小的帧。
示例数据:
da = data.frame(timestamp = c("2016-01-24 01:17:37 GMT" ,"2016-01-24 02:09:41 GMT", "2016-01-24 13:34:35 GMT", "2016-01-24 15:17:56 GMT", "2016-01-24 18:14:55 GMT"))
da
timestamp
1 2016-01-24 01:17:37 GMT
2 2016-01-24 02:09:41 GMT
3 2016-01-24 13:34:35 GMT
4 2016-01-24 15:17:56 GMT
5 2016-01-24 18:14:55 GMT
例如,我可以开始将一天分成 24 部分。然后 0:00 到 1:00 是第 1 部分,1:00 到 2:00 是第 2 部分等
da2 = data.frame(timestamp = c("2016-01-24 01:17:37 GMT" ,"2016-01-24 02:09:41 GMT", "2016-01-24 13:34:35 GMT", "2016-01-24 15:17:56 GMT", "2016-01-24 18:14:55 GMT"),
daypart = c(2, 3, 14, 16, 19))
da2
timestamp daypart
1 2016-01-24 01:17:37 GMT 2
2 2016-01-24 02:09:41 GMT 3
3 2016-01-24 13:34:35 GMT 14
4 2016-01-24 15:17:56 GMT 16
5 2016-01-24 18:14:55 GMT 19
或分成48份。然后 0:00 到 0:30 是第 1 部分,0:30 到 1:00 第 2 部分等等:
da48 = data.frame(timestamp = c("2016-01-24 01:17:37 GMT" ,"2016-01-24 02:09:41 GMT", "2016-01-24 13:34:35 GMT", "2016-01-24 15:17:56 GMT", "2016-01-24 18:14:55 GMT"),
+ daypart = c(3, 5, 28, 31, 37))
da48
timestamp daypart
1 2016-01-24 01:17:37 GMT 3
2 2016-01-24 02:09:41 GMT 5
3 2016-01-24 13:34:35 GMT 28
4 2016-01-24 15:17:56 GMT 31
5 2016-01-24 18:14:55 GMT 37
我找到了这个 post Pos on how to convert time to categorical variable,它已经有帮助了,但是我怎样才能以这样的方式编码它,我只需要更改我想要削减的部分数量进入?
将timestamp
转为POSIXlt
对象并通过$
符号提取小时数。
da$daypart <- as.POSIXlt(da$timestamp)$hour + 1
# timestamp daypart
# 1 2016-01-24 01:17:37 GMT 2
# 2 2016-01-24 02:09:41 GMT 3
# 3 2016-01-24 13:34:35 GMT 14
# 4 2016-01-24 15:17:56 GMT 16
# 5 2016-01-24 18:14:55 GMT 19
编辑:将时间分成 48 部分
time <- as.POSIXlt(da$timestamp)
da$daypart48 <- time$hour * 2 + (time$min > 30) + 1
da
# timestamp daypart daypart48
# 1 2016-01-24 01:17:37 GMT 2 3
# 2 2016-01-24 02:09:41 GMT 3 5
# 3 2016-01-24 13:34:35 GMT 14 28
# 4 2016-01-24 15:17:56 GMT 16 31
# 5 2016-01-24 18:14:55 GMT 19 37
你可以 strsplit
时间 :
,
(tmp <- do.call(rbind,
strsplit(strftime(as.POSIXct(da$timestamp), "%H:%M:%S"), ":")))
# [,1] [,2] [,3]
# [1,] "01" "17" "37"
# [2,] "02" "09" "41"
# [3,] "13" "34" "35"
# [4,] "15" "17" "56"
# [5,] "18" "14" "55"
以秒计算,
(sec <- apply(tmp, 2, as.numeric) %*% c(60*60, 60, 1))
# [,1]
# [1,] 4657
# [2,] 7781
# [3,] 48875
# [4,] 55076
# [5,] 65695
和match
用日秒cut
分成24块。
da$daypart <- cut(1:(24*60*60), 24, labels=1:24)[match(sec, 1:(24*60*60))]
结果
da
# timestamp daypart
# 1 2016-01-24 01:17:37 GMT 2
# 2 2016-01-24 02:09:41 GMT 3
# 3 2016-01-24 13:34:35 GMT 14
# 4 2016-01-24 15:17:56 GMT 16
# 5 2016-01-24 18:14:55 GMT 19
要为我的数据找到合适的聚合级别,我必须将一天分成不同大小的帧。
示例数据:
da = data.frame(timestamp = c("2016-01-24 01:17:37 GMT" ,"2016-01-24 02:09:41 GMT", "2016-01-24 13:34:35 GMT", "2016-01-24 15:17:56 GMT", "2016-01-24 18:14:55 GMT"))
da
timestamp
1 2016-01-24 01:17:37 GMT
2 2016-01-24 02:09:41 GMT
3 2016-01-24 13:34:35 GMT
4 2016-01-24 15:17:56 GMT
5 2016-01-24 18:14:55 GMT
例如,我可以开始将一天分成 24 部分。然后 0:00 到 1:00 是第 1 部分,1:00 到 2:00 是第 2 部分等
da2 = data.frame(timestamp = c("2016-01-24 01:17:37 GMT" ,"2016-01-24 02:09:41 GMT", "2016-01-24 13:34:35 GMT", "2016-01-24 15:17:56 GMT", "2016-01-24 18:14:55 GMT"),
daypart = c(2, 3, 14, 16, 19))
da2
timestamp daypart
1 2016-01-24 01:17:37 GMT 2
2 2016-01-24 02:09:41 GMT 3
3 2016-01-24 13:34:35 GMT 14
4 2016-01-24 15:17:56 GMT 16
5 2016-01-24 18:14:55 GMT 19
或分成48份。然后 0:00 到 0:30 是第 1 部分,0:30 到 1:00 第 2 部分等等:
da48 = data.frame(timestamp = c("2016-01-24 01:17:37 GMT" ,"2016-01-24 02:09:41 GMT", "2016-01-24 13:34:35 GMT", "2016-01-24 15:17:56 GMT", "2016-01-24 18:14:55 GMT"),
+ daypart = c(3, 5, 28, 31, 37))
da48
timestamp daypart
1 2016-01-24 01:17:37 GMT 3
2 2016-01-24 02:09:41 GMT 5
3 2016-01-24 13:34:35 GMT 28
4 2016-01-24 15:17:56 GMT 31
5 2016-01-24 18:14:55 GMT 37
我找到了这个 post Pos on how to convert time to categorical variable,它已经有帮助了,但是我怎样才能以这样的方式编码它,我只需要更改我想要削减的部分数量进入?
将timestamp
转为POSIXlt
对象并通过$
符号提取小时数。
da$daypart <- as.POSIXlt(da$timestamp)$hour + 1
# timestamp daypart
# 1 2016-01-24 01:17:37 GMT 2
# 2 2016-01-24 02:09:41 GMT 3
# 3 2016-01-24 13:34:35 GMT 14
# 4 2016-01-24 15:17:56 GMT 16
# 5 2016-01-24 18:14:55 GMT 19
编辑:将时间分成 48 部分
time <- as.POSIXlt(da$timestamp)
da$daypart48 <- time$hour * 2 + (time$min > 30) + 1
da
# timestamp daypart daypart48
# 1 2016-01-24 01:17:37 GMT 2 3
# 2 2016-01-24 02:09:41 GMT 3 5
# 3 2016-01-24 13:34:35 GMT 14 28
# 4 2016-01-24 15:17:56 GMT 16 31
# 5 2016-01-24 18:14:55 GMT 19 37
你可以 strsplit
时间 :
,
(tmp <- do.call(rbind,
strsplit(strftime(as.POSIXct(da$timestamp), "%H:%M:%S"), ":")))
# [,1] [,2] [,3]
# [1,] "01" "17" "37"
# [2,] "02" "09" "41"
# [3,] "13" "34" "35"
# [4,] "15" "17" "56"
# [5,] "18" "14" "55"
以秒计算,
(sec <- apply(tmp, 2, as.numeric) %*% c(60*60, 60, 1))
# [,1]
# [1,] 4657
# [2,] 7781
# [3,] 48875
# [4,] 55076
# [5,] 65695
和match
用日秒cut
分成24块。
da$daypart <- cut(1:(24*60*60), 24, labels=1:24)[match(sec, 1:(24*60*60))]
结果
da
# timestamp daypart
# 1 2016-01-24 01:17:37 GMT 2
# 2 2016-01-24 02:09:41 GMT 3
# 3 2016-01-24 13:34:35 GMT 14
# 4 2016-01-24 15:17:56 GMT 16
# 5 2016-01-24 18:14:55 GMT 19