with_tz 带有时区向量
with_tz with a vector of timezones
我有一个像这样的数据框:
library(dplyr)
data <- data_frame(
timestamp_utc = c('2015-11-18 03:55:04', '2015-11-18 03:55:08',
'2015-11-18 03:55:10'),
local_tz = c('America/New_York', 'America/Los_Angeles',
'America/Indiana/Indianapolis')
)
我需要创建一个新变量,将 UTC 时间戳转换为 local_tz
列中定义的本地时间。但是,format
和 with_tz
(来自 lubridate
)都只需要一个时区,而不是时区向量。我正在寻找这样的东西:
mutate(data, timestamp_local = with_tz(timestamp_utc, tzone = local_tz))
有什么想法吗?
首先确保您的数据加载为日期 - 我必须先转换为日期:
data$timestamp_utc <- as.POSIXct(data$timestamp_utc, tz = "UTC")
然后您可以使用 dplyr
中的函数 rowwise
,结合 do
:
library(lubridate)
library(dplyr)
z <- data %>% rowwise() %>%
do(timestamp_local = with_tz(.$timestamp_utc, tzone = .$local_tz))
data$timestamp_local <- z$timestamp_local
data$timestamp_local
[[1]]
[1] "2015-11-17 22:55:04 EST"
[[2]]
[1] "2015-11-17 19:55:08 PST"
[[3]]
[1] "2015-11-17 22:55:10 EST"
我们需要将 timestamp_local 列设为列表,否则所有时区都将转换回一个时区,一个向量中只能有一个时区。
这是一种方法。这样,结果必须是一个字符串,否则 unlist()
或 c()
会将结果返回到列表中每个元素的系统时区。
虽然它仍然很慢,因为它没有矢量化。
> get_local_time <- function(timestamp_utc, local_tz) {
l <- lapply(seq(length(timestamp_utc)),
function(x) {format(with_tz(timestamp_utc[x], local_tz[x]), "%FT%T%z")})
unlist(l)
}
> mutate(data, timestamp_local = get_local_time(timestamp_utc, tzone = local_tz))
Source: local data frame [3 x 3]
timestamp_utc local_tz timestamp_local
(time) (chr) (chr)
1 2015-11-18 03:55:04 America/New_York 2015-11-17T22:55:04-0500
2 2015-11-18 03:55:08 America/Los_Angeles 2015-11-17T19:55:08-0800
3 2015-11-18 03:55:10 America/Indiana/Indianapolis 2015-11-17T22:55:10-0500
更新2015-11-24
使用 dplyr::combine()
而不是 unlist()
允许变量保留具有正确时区属性的日期时间,而不是转换为字符串。
> get_local_time <- function(timestamp_utc, local_tz) {
l <- lapply(seq(length(timestamp_utc)),
function(x) {with_tz(timestamp_utc[x], local_tz[x])})
combine(l)
}
> mutate(data, timestamp_local = get_local_time(timestamp_utc, tzone = local_tz))
Source: local data frame [3 x 3]
timestamp_utc local_tz timestamp_local
(time) (chr) (time)
1 2015-11-18 03:55:04 America/New_York 2015-11-17T22:55:04
2 2015-11-18 03:55:08 America/Los_Angeles 2015-11-17T19:55:08
3 2015-11-18 03:55:10 America/Indiana/Indianapolis 2015-11-17T22:55:10
可以按如下方式矢量化时区转换
library(dplyr)
library(lubridate)
with_tz_utc <- function(ts, tz) force_tz(with_tz(ts, tz), 'UTC')
as_datetime_with_tz_utc <- compose(as_datetime, Vectorize(with_tz_utc))
现在照常使用mutate
data %>%
mutate(
timestamp_utc = as_datetime(timestamp_utc),
timestamp_local = as_datetime_with_tz_utc(timestamp_utc, local_tz)
)
作为另一种选择——速度要慢得多——可以像这样将函数 rowwise
与 mutate
和 ungroup
(恢复 rowwise
)一起使用
data %>%
rowwise() %>%
mutate(
timestamp_utc = as_datetime(timestamp_utc),
timestamp_local = with_tz_utc(timestamp_utc, local_tz)
) %>%
ungroup()
诀窍是在 mutate()
之前使用 group_by()
和 local_tz
:
data$timestamp_utc <- as.POSIXct(data$timestamp_utc, tz = "UTC")
data %>%
group_by(local_tz) %>%
mutate(timestamp_local = with_tz(timestamp_utc, local_tz))
一个 data.table 选项对我很有效:
data[, timestamp_local := with_tz(timestamp_utc, local_tz), by=local_tz]
我有一个像这样的数据框:
library(dplyr)
data <- data_frame(
timestamp_utc = c('2015-11-18 03:55:04', '2015-11-18 03:55:08',
'2015-11-18 03:55:10'),
local_tz = c('America/New_York', 'America/Los_Angeles',
'America/Indiana/Indianapolis')
)
我需要创建一个新变量,将 UTC 时间戳转换为 local_tz
列中定义的本地时间。但是,format
和 with_tz
(来自 lubridate
)都只需要一个时区,而不是时区向量。我正在寻找这样的东西:
mutate(data, timestamp_local = with_tz(timestamp_utc, tzone = local_tz))
有什么想法吗?
首先确保您的数据加载为日期 - 我必须先转换为日期:
data$timestamp_utc <- as.POSIXct(data$timestamp_utc, tz = "UTC")
然后您可以使用 dplyr
中的函数 rowwise
,结合 do
:
library(lubridate)
library(dplyr)
z <- data %>% rowwise() %>%
do(timestamp_local = with_tz(.$timestamp_utc, tzone = .$local_tz))
data$timestamp_local <- z$timestamp_local
data$timestamp_local
[[1]]
[1] "2015-11-17 22:55:04 EST"
[[2]]
[1] "2015-11-17 19:55:08 PST"
[[3]]
[1] "2015-11-17 22:55:10 EST"
我们需要将 timestamp_local 列设为列表,否则所有时区都将转换回一个时区,一个向量中只能有一个时区。
这是一种方法。这样,结果必须是一个字符串,否则 unlist()
或 c()
会将结果返回到列表中每个元素的系统时区。
虽然它仍然很慢,因为它没有矢量化。
> get_local_time <- function(timestamp_utc, local_tz) {
l <- lapply(seq(length(timestamp_utc)),
function(x) {format(with_tz(timestamp_utc[x], local_tz[x]), "%FT%T%z")})
unlist(l)
}
> mutate(data, timestamp_local = get_local_time(timestamp_utc, tzone = local_tz))
Source: local data frame [3 x 3]
timestamp_utc local_tz timestamp_local
(time) (chr) (chr)
1 2015-11-18 03:55:04 America/New_York 2015-11-17T22:55:04-0500
2 2015-11-18 03:55:08 America/Los_Angeles 2015-11-17T19:55:08-0800
3 2015-11-18 03:55:10 America/Indiana/Indianapolis 2015-11-17T22:55:10-0500
更新2015-11-24
使用 dplyr::combine()
而不是 unlist()
允许变量保留具有正确时区属性的日期时间,而不是转换为字符串。
> get_local_time <- function(timestamp_utc, local_tz) {
l <- lapply(seq(length(timestamp_utc)),
function(x) {with_tz(timestamp_utc[x], local_tz[x])})
combine(l)
}
> mutate(data, timestamp_local = get_local_time(timestamp_utc, tzone = local_tz))
Source: local data frame [3 x 3]
timestamp_utc local_tz timestamp_local
(time) (chr) (time)
1 2015-11-18 03:55:04 America/New_York 2015-11-17T22:55:04
2 2015-11-18 03:55:08 America/Los_Angeles 2015-11-17T19:55:08
3 2015-11-18 03:55:10 America/Indiana/Indianapolis 2015-11-17T22:55:10
可以按如下方式矢量化时区转换
library(dplyr)
library(lubridate)
with_tz_utc <- function(ts, tz) force_tz(with_tz(ts, tz), 'UTC')
as_datetime_with_tz_utc <- compose(as_datetime, Vectorize(with_tz_utc))
现在照常使用mutate
data %>%
mutate(
timestamp_utc = as_datetime(timestamp_utc),
timestamp_local = as_datetime_with_tz_utc(timestamp_utc, local_tz)
)
作为另一种选择——速度要慢得多——可以像这样将函数 rowwise
与 mutate
和 ungroup
(恢复 rowwise
)一起使用
data %>%
rowwise() %>%
mutate(
timestamp_utc = as_datetime(timestamp_utc),
timestamp_local = with_tz_utc(timestamp_utc, local_tz)
) %>%
ungroup()
诀窍是在 mutate()
之前使用 group_by()
和 local_tz
:
data$timestamp_utc <- as.POSIXct(data$timestamp_utc, tz = "UTC")
data %>%
group_by(local_tz) %>%
mutate(timestamp_local = with_tz(timestamp_utc, local_tz))
一个 data.table 选项对我很有效:
data[, timestamp_local := with_tz(timestamp_utc, local_tz), by=local_tz]