获取Dataframe中日期范围对应的名称
Obtain Name Corresponding to a Date Range in Dataframe
我有两个数据框。一个包含任命员工的经理的任期,另一个包含员工任期的详细信息。我想在工作人员中创建一个新列table 以获取任命她的经理姓名。
staff_df <- dput(structure(list(employee_name = c("alex", "timor", "tai", "lil",
"mae", "jae"), joined = c("26-Jan-50", "23-Sep-60", "5-Mar-61",
"8-Sep-72", "8-Sep-82", "9-Mar-93"), retired = c("18-Sep-51",
"24-Jan-63", "8-Jun-66", "5-Nov-74", "6-Jun-89", "26-Jul-98")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame")))
manager_df <- dput(structure(list(manager_name = c("john", "doe", "mary", "jane"
), date_became_manager = c("26-Jan-50", "27-Aug-50", "8-Nov-68",
"5-Jan-84"), manager_till = c("26-Aug-50", "7-Nov-68", "4-Jan-84",
"23-Dec-01")), row.names = c(NA, -4L), class = c("tbl_df", "tbl",
"data.frame")))
I tried the following:
staff_df$appointed_by = NA
staff_df$appointed_by <- manager_df$manager_name[which(staff_df$joined > manager_df$date_became_manager & staff_df$joined <= manager_df$manager_till)]'''
Effectively, I need the new column in the staff_df called appointed_by and obtain the name of the manager who appointed the personnel. ie., Alex was appointed by John, Timor was appointed by doe, Tai was appointed by Doe (...) Jae was appointed by Jane.
EDIT: dput()
library(lubridate)
library(dplyr)
staff_df <- staff_df %>%
mutate(
joined = dmy(joined),
retired = dmy(retired)
)
manager_df <- manager_df %>%
mutate(
date_became_manager = dmy(date_became_manager),
manager_till = dmy(manager_till)
)
library(sqldf)
res <- sqldf("SELECT *
FROM staff_df
INNER JOIN manager_df ON
staff_df.joined>=manager_df.date_became_manager AND
staff_df.joined<manager_till")
> res
employee_name joined retired manager_name date_became_manager manager_till
1 alex 2050-01-26 2051-09-18 john 2050-01-26 2050-08-26
2 timor 2060-09-23 2063-01-24 doe 2050-08-27 2068-11-07
3 tai 2061-03-05 2066-06-08 doe 2050-08-27 2068-11-07
4 jae 1993-03-09 1998-07-26 jane 1984-01-05 2001-12-23
我提出这个 tidyverse
策略。但是,在给定的dput中,日期是字符格式,所以我先把它们改成日期类型。
staff_df <- dput(structure(list(employee_name = c("alex", "timor", "tai", "lil",
"mae", "jae"), joined = c("26-Jan-50", "23-Sep-60", "5-Mar-61",
"8-Sep-72", "8-Sep-82", "9-Mar-93"), retired = c("18-Sep-51",
"24-Jan-63", "8-Jun-66", "5-Nov-74", "6-Jun-89", "26-Jul-98")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame")))
#> structure(list(employee_name = c("alex", "timor", "tai", "lil",
#> "mae", "jae"), joined = c("26-Jan-50", "23-Sep-60", "5-Mar-61",
#> "8-Sep-72", "8-Sep-82", "9-Mar-93"), retired = c("18-Sep-51",
#> "24-Jan-63", "8-Jun-66", "5-Nov-74", "6-Jun-89", "26-Jul-98")), row.names = c(NA,
#> -6L), class = c("tbl_df", "tbl", "data.frame"))
manager_df <- dput(structure(list(manager_name = c("john", "doe", "mary", "jane"
), date_became_manager = c("26-Jan-50", "27-Aug-50", "8-Nov-68",
"5-Jan-84"), manager_till = c("26-Aug-50", "7-Nov-68", "4-Jan-84",
"23-Dec-01")), row.names = c(NA, -4L), class = c("tbl_df", "tbl",
"data.frame")))
#> structure(list(manager_name = c("john", "doe", "mary", "jane"
#> ), date_became_manager = c("26-Jan-50", "27-Aug-50", "8-Nov-68",
#> "5-Jan-84"), manager_till = c("26-Aug-50", "7-Nov-68", "4-Jan-84",
#> "23-Dec-01")), row.names = c(NA, -4L), class = c("tbl_df", "tbl",
#> "data.frame"))
#change data_type
staff_df$joined <- as.Date(staff_df$joined, '%d-%b-%y')
staff_df$retired <- as.Date(staff_df$retired, '%d-%b-%y')
manager_df$date_became_manager <- as.Date(manager_df$date_became_manager, '%d-%b-%y')
manager_df$manager_till <- as.Date(manager_df$manager_till, '%d-%b-%y')
library(tidyverse)
staff_df %>%
mutate(manager_app = map_chr(joined, ~{xx <- manager_df$manager_name[manager_df$date_became_manager <= .x &
manager_df$manager_till >= .x];
if (length(xx) == 0) NA else xx})) %>%
filter(!is.na(manager_app)) %>%
left_join(manager_df, by = c('manager_app' = 'manager_name'))
#> # A tibble: 4 x 6
#> employee_name joined retired manager_app date_became_mana~ manager_till
#> <chr> <date> <date> <chr> <date> <date>
#> 1 alex 2050-01-26 2051-09-18 john 2050-01-26 2050-08-26
#> 2 timor 2060-09-23 2063-01-24 doe 2050-08-27 2068-11-07
#> 3 tai 2061-03-05 2066-06-08 doe 2050-08-27 2068-11-07
#> 4 jae 1993-03-09 1998-07-26 jane 1984-01-05 2001-12-23
甚至 filter
这一步也是可选的
staff_df %>%
mutate(manager_app = map_chr(joined, ~{xx <- manager_df$manager_name[manager_df$date_became_manager <= .x &
manager_df$manager_till >= .x];
if (length(xx) == 0) NA else xx})) %>%
left_join(manager_df, by = c('manager_app' = 'manager_name'))
# A tibble: 6 x 6
employee_name joined retired manager_app date_became_manager manager_till
<chr> <date> <date> <chr> <date> <date>
1 alex 2050-01-26 2051-09-18 john 2050-01-26 2050-08-26
2 timor 2060-09-23 2063-01-24 doe 2050-08-27 2068-11-07
3 tai 2061-03-05 2066-06-08 doe 2050-08-27 2068-11-07
4 lil 1972-09-08 1974-11-05 NA NA NA
5 mae 1982-09-08 1989-06-06 NA NA NA
6 jae 1993-03-09 1998-07-26 jane 1984-01-05 2001-12-23
我有两个数据框。一个包含任命员工的经理的任期,另一个包含员工任期的详细信息。我想在工作人员中创建一个新列table 以获取任命她的经理姓名。
staff_df <- dput(structure(list(employee_name = c("alex", "timor", "tai", "lil",
"mae", "jae"), joined = c("26-Jan-50", "23-Sep-60", "5-Mar-61",
"8-Sep-72", "8-Sep-82", "9-Mar-93"), retired = c("18-Sep-51",
"24-Jan-63", "8-Jun-66", "5-Nov-74", "6-Jun-89", "26-Jul-98")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame")))
manager_df <- dput(structure(list(manager_name = c("john", "doe", "mary", "jane"
), date_became_manager = c("26-Jan-50", "27-Aug-50", "8-Nov-68",
"5-Jan-84"), manager_till = c("26-Aug-50", "7-Nov-68", "4-Jan-84",
"23-Dec-01")), row.names = c(NA, -4L), class = c("tbl_df", "tbl",
"data.frame")))
I tried the following:
staff_df$appointed_by = NA
staff_df$appointed_by <- manager_df$manager_name[which(staff_df$joined > manager_df$date_became_manager & staff_df$joined <= manager_df$manager_till)]'''
Effectively, I need the new column in the staff_df called appointed_by and obtain the name of the manager who appointed the personnel. ie., Alex was appointed by John, Timor was appointed by doe, Tai was appointed by Doe (...) Jae was appointed by Jane.
EDIT: dput()
library(lubridate)
library(dplyr)
staff_df <- staff_df %>%
mutate(
joined = dmy(joined),
retired = dmy(retired)
)
manager_df <- manager_df %>%
mutate(
date_became_manager = dmy(date_became_manager),
manager_till = dmy(manager_till)
)
library(sqldf)
res <- sqldf("SELECT *
FROM staff_df
INNER JOIN manager_df ON
staff_df.joined>=manager_df.date_became_manager AND
staff_df.joined<manager_till")
> res
employee_name joined retired manager_name date_became_manager manager_till
1 alex 2050-01-26 2051-09-18 john 2050-01-26 2050-08-26
2 timor 2060-09-23 2063-01-24 doe 2050-08-27 2068-11-07
3 tai 2061-03-05 2066-06-08 doe 2050-08-27 2068-11-07
4 jae 1993-03-09 1998-07-26 jane 1984-01-05 2001-12-23
我提出这个 tidyverse
策略。但是,在给定的dput中,日期是字符格式,所以我先把它们改成日期类型。
staff_df <- dput(structure(list(employee_name = c("alex", "timor", "tai", "lil",
"mae", "jae"), joined = c("26-Jan-50", "23-Sep-60", "5-Mar-61",
"8-Sep-72", "8-Sep-82", "9-Mar-93"), retired = c("18-Sep-51",
"24-Jan-63", "8-Jun-66", "5-Nov-74", "6-Jun-89", "26-Jul-98")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame")))
#> structure(list(employee_name = c("alex", "timor", "tai", "lil",
#> "mae", "jae"), joined = c("26-Jan-50", "23-Sep-60", "5-Mar-61",
#> "8-Sep-72", "8-Sep-82", "9-Mar-93"), retired = c("18-Sep-51",
#> "24-Jan-63", "8-Jun-66", "5-Nov-74", "6-Jun-89", "26-Jul-98")), row.names = c(NA,
#> -6L), class = c("tbl_df", "tbl", "data.frame"))
manager_df <- dput(structure(list(manager_name = c("john", "doe", "mary", "jane"
), date_became_manager = c("26-Jan-50", "27-Aug-50", "8-Nov-68",
"5-Jan-84"), manager_till = c("26-Aug-50", "7-Nov-68", "4-Jan-84",
"23-Dec-01")), row.names = c(NA, -4L), class = c("tbl_df", "tbl",
"data.frame")))
#> structure(list(manager_name = c("john", "doe", "mary", "jane"
#> ), date_became_manager = c("26-Jan-50", "27-Aug-50", "8-Nov-68",
#> "5-Jan-84"), manager_till = c("26-Aug-50", "7-Nov-68", "4-Jan-84",
#> "23-Dec-01")), row.names = c(NA, -4L), class = c("tbl_df", "tbl",
#> "data.frame"))
#change data_type
staff_df$joined <- as.Date(staff_df$joined, '%d-%b-%y')
staff_df$retired <- as.Date(staff_df$retired, '%d-%b-%y')
manager_df$date_became_manager <- as.Date(manager_df$date_became_manager, '%d-%b-%y')
manager_df$manager_till <- as.Date(manager_df$manager_till, '%d-%b-%y')
library(tidyverse)
staff_df %>%
mutate(manager_app = map_chr(joined, ~{xx <- manager_df$manager_name[manager_df$date_became_manager <= .x &
manager_df$manager_till >= .x];
if (length(xx) == 0) NA else xx})) %>%
filter(!is.na(manager_app)) %>%
left_join(manager_df, by = c('manager_app' = 'manager_name'))
#> # A tibble: 4 x 6
#> employee_name joined retired manager_app date_became_mana~ manager_till
#> <chr> <date> <date> <chr> <date> <date>
#> 1 alex 2050-01-26 2051-09-18 john 2050-01-26 2050-08-26
#> 2 timor 2060-09-23 2063-01-24 doe 2050-08-27 2068-11-07
#> 3 tai 2061-03-05 2066-06-08 doe 2050-08-27 2068-11-07
#> 4 jae 1993-03-09 1998-07-26 jane 1984-01-05 2001-12-23
甚至 filter
这一步也是可选的
staff_df %>%
mutate(manager_app = map_chr(joined, ~{xx <- manager_df$manager_name[manager_df$date_became_manager <= .x &
manager_df$manager_till >= .x];
if (length(xx) == 0) NA else xx})) %>%
left_join(manager_df, by = c('manager_app' = 'manager_name'))
# A tibble: 6 x 6
employee_name joined retired manager_app date_became_manager manager_till
<chr> <date> <date> <chr> <date> <date>
1 alex 2050-01-26 2051-09-18 john 2050-01-26 2050-08-26
2 timor 2060-09-23 2063-01-24 doe 2050-08-27 2068-11-07
3 tai 2061-03-05 2066-06-08 doe 2050-08-27 2068-11-07
4 lil 1972-09-08 1974-11-05 NA NA NA
5 mae 1982-09-08 1989-06-06 NA NA NA
6 jae 1993-03-09 1998-07-26 jane 1984-01-05 2001-12-23