在 dplyr 中解析日期时间
parsing date times in dplyr
我需要解析多个日期和时间。这是一个例子:
library(dplyr)
library(tidyr)
library(lubridate)
date.called <- c("28-Feb","10/1/16","8/13/15")
call.times <- c("912","1513","922, 1810")
df<- data.frame(call.times,date.called,stringsAsFactors = F )
df <- df %>%
separate(call.times, c("time.1", "time.2"), sep = "\,", remove=FALSE) %>%
mutate(time.1 = trimws(time.1), time.2 = trimws(time.2))
#parse each new date and time
df %>% mutate(time.1 = ifelse( !is.na(time.1) ,
parse_date_time(sprintf("%s %04s", date.called,
gsub('^([0-9]+)([0-9]{2})$', '\1:\2', time.1)),
c('%m/%d/%y %H:%M','%d-%b %H%M')), time.1),
time.2 = ifelse( !is.na(time.2) ,
parse_date_time(sprintf("%s %04s", date.called,
gsub('^([0-9]+)([0-9]{2})$', '\1:\2', time.2)),
c('%m/%d/%y %H:%M','%d-%b %H%M')), time.2) )
我得到 df 的以下输出:
call.times time.1 time.2 date.called
1 912 -62162174880 <NA> 28-Feb
2 1513 1475334780 <NA> 10/1/16
3 922, 1810 1439457720 1439489400 8/13/15
相反,我正在尝试生成此输出:
call.times time.1 time.2 date.called
1 912 0000-02-28 09:12:00 <NA> 28-Feb
2 1513 2016-10-01 15:13:00 <NA> 10/1/16
3 922, 1810 2015-08-13 09:22:00 2015-08-13 18:10:00 8/13/15
感谢您的帮助。
我愿意
df %>% mutate_at(vars(starts_with("time")), function(x)
parse_date_time(
sprintf("%s %04s", date.called, gsub('^([0-9]+)([0-9]{2})$', '\1:\2', x)),
c('%m/%d/%y %H:%M','%d-%b %H%M')
))
# call.times time.1 time.2 date.called
# 1 912 0000-02-28 09:12:00 <NA> 28-Feb
# 2 1513 2016-10-01 15:13:00 <NA> 10/1/16
# 3 922, 1810 2015-08-13 09:22:00 2015-08-13 18:10:00 8/13/15
正如 LukeA 在评论中所说,ifelse
剥离了属性,从而删除了日期格式。您可以通过将 parse_date_time
函数移到 ifelse
之外来规避此行为。您的代码的修改版本会给出您想要的结果:
df %>%
mutate(time.1 = parse_date_time(ifelse(!is.na(time.2),
sprintf("%s %04s", date.called, gsub('^([0-9]+)([0-9]{2})$', '\1:\2', time.1)),
NA),
c('%m/%d/%y %H:%M','%d-%b %H%M')),
time.2 = parse_date_time(ifelse(!is.na(time.2),
sprintf("%s %04s", date.called, gsub('^([0-9]+)([0-9]{2})$', '\1:\2', time.2)),
NA),
c('%m/%d/%y %H:%M','%d-%b %H%M'))
)
结果:
call.times time.1 time.2 date.called
1 912 0000-02-28 09:12:00 <NA> 28-Feb
2 1513 2016-10-01 15:13:00 <NA> 10/1/16
3 922, 1810 2015-08-13 09:22:00 2015-08-13 18:10:00 8/13/15
我需要解析多个日期和时间。这是一个例子:
library(dplyr)
library(tidyr)
library(lubridate)
date.called <- c("28-Feb","10/1/16","8/13/15")
call.times <- c("912","1513","922, 1810")
df<- data.frame(call.times,date.called,stringsAsFactors = F )
df <- df %>%
separate(call.times, c("time.1", "time.2"), sep = "\,", remove=FALSE) %>%
mutate(time.1 = trimws(time.1), time.2 = trimws(time.2))
#parse each new date and time
df %>% mutate(time.1 = ifelse( !is.na(time.1) ,
parse_date_time(sprintf("%s %04s", date.called,
gsub('^([0-9]+)([0-9]{2})$', '\1:\2', time.1)),
c('%m/%d/%y %H:%M','%d-%b %H%M')), time.1),
time.2 = ifelse( !is.na(time.2) ,
parse_date_time(sprintf("%s %04s", date.called,
gsub('^([0-9]+)([0-9]{2})$', '\1:\2', time.2)),
c('%m/%d/%y %H:%M','%d-%b %H%M')), time.2) )
我得到 df 的以下输出:
call.times time.1 time.2 date.called
1 912 -62162174880 <NA> 28-Feb
2 1513 1475334780 <NA> 10/1/16
3 922, 1810 1439457720 1439489400 8/13/15
相反,我正在尝试生成此输出:
call.times time.1 time.2 date.called
1 912 0000-02-28 09:12:00 <NA> 28-Feb
2 1513 2016-10-01 15:13:00 <NA> 10/1/16
3 922, 1810 2015-08-13 09:22:00 2015-08-13 18:10:00 8/13/15
感谢您的帮助。
我愿意
df %>% mutate_at(vars(starts_with("time")), function(x)
parse_date_time(
sprintf("%s %04s", date.called, gsub('^([0-9]+)([0-9]{2})$', '\1:\2', x)),
c('%m/%d/%y %H:%M','%d-%b %H%M')
))
# call.times time.1 time.2 date.called
# 1 912 0000-02-28 09:12:00 <NA> 28-Feb
# 2 1513 2016-10-01 15:13:00 <NA> 10/1/16
# 3 922, 1810 2015-08-13 09:22:00 2015-08-13 18:10:00 8/13/15
正如 LukeA 在评论中所说,ifelse
剥离了属性,从而删除了日期格式。您可以通过将 parse_date_time
函数移到 ifelse
之外来规避此行为。您的代码的修改版本会给出您想要的结果:
df %>%
mutate(time.1 = parse_date_time(ifelse(!is.na(time.2),
sprintf("%s %04s", date.called, gsub('^([0-9]+)([0-9]{2})$', '\1:\2', time.1)),
NA),
c('%m/%d/%y %H:%M','%d-%b %H%M')),
time.2 = parse_date_time(ifelse(!is.na(time.2),
sprintf("%s %04s", date.called, gsub('^([0-9]+)([0-9]{2})$', '\1:\2', time.2)),
NA),
c('%m/%d/%y %H:%M','%d-%b %H%M'))
)
结果:
call.times time.1 time.2 date.called
1 912 0000-02-28 09:12:00 <NA> 28-Feb
2 1513 2016-10-01 15:13:00 <NA> 10/1/16
3 922, 1810 2015-08-13 09:22:00 2015-08-13 18:10:00 8/13/15