在 R 中,如何传递一个小标题中给定的参数以对另一个小标题执行操作?
How, in R, to you pass parameters given in one tibble to perform operations on another?
我希望将一些参数存储在一个小标题中,并使用该信息聚合与来自另一个数据集的响应行关联的值。因此,在下面的示例设置中,p1 的 return 将是 data_tibble 中所有记录的总和值,其中数字 = 123,代码为 "code1" 或 "code2"*,日期值等于 2020-01-01。 p2 依此类推。
我不知道该怎么做;我尝试使用 apply 和 separately 创建一个函数来简单地为 运行 本身创建一个函数并产生一个结果向量(如下面的 doesnotwork 函数所示)。
感谢任何帮助!
已编辑以更正拼写错误
*这里有点傻,但我有数百个不同的代码要处理,在某些情况下,除了少数之外,排除所有代码会更容易,而在其他情况下,最好只是包括一些,等等。所以一个 NULL,或者传递列表的能力,将导致所有记录传递的东西是理想的。
library(tidyverse)
library(lubridate)
#Set up Parameters
parameters_tibble <- tibble(name = character(),
number = numeric(),
acceptable_codes = list(),
unacceptable_codes = list(),
cutoff_date = date(),
.rows = NULL)
parameters_tibble$date <- as_date(parameters_tibble$date)
parameters_tibble <- add_row(parameters_tibble,
name = "param1",
number = 123,
acceptable_codes = list(c("code1", "code2")),
unacceptable_codes = list(NULL),
cutoff_date = as_date("2020-01-01"))
parameters_tibble <- add_row(parameters_tibble,
name = "param2",
number = 456,
acceptable_codes = list(NULL),
unacceptable_codes = list("code72"),
cutoff_date = as_date("2020-01-01"))
#Create sample dataset
data_tibble <- tibble(number = numeric(),
code = character(),
date = date(),
value = numeric(),
.rows=NULL)
data_tibble$date <- as_date(data_tibble$date)
data_tibble <- add_row(data_tibble,
number = rep(c(123,456),6),
code = rep(c("code1", "code2", "code3", "code4"),3),
date = as_date(rep(c("2020-01-01","2019-11-01"),6)),
value = rep(1:12))
doesnotwork <- function(dt = data_tibble, pt = parameters_tibble) {
aggregatedValues <- dt %>%
sum(
filter(number == pt$number &
code %in% pt$acceptable_codes &
!(code %in% pt$acceptable_codes) &
date >= pt$cutoff_date)
$value)
return(aggregatedValues)
}
呸。下次请做一个可重现的例子,例如使用 df 中存在的列(date
在 parameters_tibble 中没有 var,Std.Amt
在 data_tibble 中没有列)。根据您的代码和描述,我计算聚合值的方法使用 unnest
来删除列表列(注意:定义可接受的代码就足够了),然后使用连接 + 一些额外的过滤语句来过滤您的数据根据参数 tibble 进行 tibble。在这一步之后是计算聚合值的简单任务。试试这个:
library(tidyverse)
library(lubridate)
#Set up Parameters
parameters_tibble <- tibble(name = character(),
number = numeric(),
acceptable_codes = list(),
unacceptable_codes = list(),
cutoff_date = date(),
.rows = NULL)
parameters_tibble$cutoff_date <- as_date(parameters_tibble$cutoff_date)
parameters_tibble <- add_row(parameters_tibble,
name = "param1",
number = 123,
acceptable_codes = list(c("code1", "code2")),
unacceptable_codes = list(NULL),
cutoff_date = "2020-01-01")
parameters_tibble <- add_row(parameters_tibble,
name = "param2",
number = 456,
acceptable_codes = list(c("code1", "code2", "code3")),
unacceptable_codes = list("code4"),
cutoff_date = "2020-01-01")
# Unnest: One row for each acceptable code
parameters_tibble_unnest <- parameters_tibble %>%
unnest(c(acceptable_codes, unacceptable_codes))
#Create sample dataset
data_tibble <- tibble(number = numeric(),
code = character(),
date = date(),
value = numeric(),
.rows=NULL)
data_tibble$date <- as_date(data_tibble$date)
data_tibble <- add_row(data_tibble,
number = rep(c(123,456),6),
code = rep(c("code1", "code2", "code3", "code4"),3),
date = as_date(rep(c("2020-01-01","2019-11-01"),6)),
value = rep(1:12))
# Filter using joins
df_join <- left_join(data_tibble, parameters_tibble_unnest, by = c("number", "code" = "acceptable_codes")) %>%
# Drop non-matching data-rows with no match in acceptable codes
filter(!is.na(name)) %>%
# filter for cutoff-date
filter(date >= cutoff_date) %>%
# filter for unacceptable_codes
filter(!code %in% unacceptable_codes)
df_join
#> # A tibble: 3 x 7
#> number code date value name unacceptable_codes cutoff_date
#> <dbl> <chr> <date> <dbl> <chr> <chr> <date>
#> 1 123 code1 2020-01-01 1 param1 <NA> 2020-01-01
#> 2 123 code1 2020-01-01 5 param1 <NA> 2020-01-01
#> 3 123 code1 2020-01-01 9 param1 <NA> 2020-01-01
aggregated_values <- df_join %>%
count(number, wt = value)
aggregated_values
#> # A tibble: 1 x 2
#> number n
#> <dbl> <dbl>
#> 1 123 15
更新:将其放入函数中
get_aggregated_values <- function(dt, pt) {
# Unnest: One row for each acceptable and unacceptable code
pt_unnest <- pt %>%
unnest(c(acceptable_codes, unacceptable_codes))
# Join by number and code == acceptable_codes
dt_join <- left_join(dt, pt_unnest, by = c("number", "code" = "acceptable_codes")) %>%
# Drop non-matching data-rows with no match in acceptable codes
filter(!is.na(name)) %>%
# filter for cutoff-date
filter(date >= cutoff_date) %>%
# filter for unacceptable_codes
filter(!code %in% unacceptable_codes)
aggregated_values <- dt_join %>%
count(number, wt = value)
aggregated_values
}
get_aggregated_values(data_tibble, parameters_tibble)
#> # A tibble: 1 x 2
#> number n
#> <dbl> <dbl>
#> 1 123 15
由 reprex package (v0.3.0)
于 2020-03-25 创建
我希望将一些参数存储在一个小标题中,并使用该信息聚合与来自另一个数据集的响应行关联的值。因此,在下面的示例设置中,p1 的 return 将是 data_tibble 中所有记录的总和值,其中数字 = 123,代码为 "code1" 或 "code2"*,日期值等于 2020-01-01。 p2 依此类推。
我不知道该怎么做;我尝试使用 apply 和 separately 创建一个函数来简单地为 运行 本身创建一个函数并产生一个结果向量(如下面的 doesnotwork 函数所示)。
感谢任何帮助!
已编辑以更正拼写错误
*这里有点傻,但我有数百个不同的代码要处理,在某些情况下,除了少数之外,排除所有代码会更容易,而在其他情况下,最好只是包括一些,等等。所以一个 NULL,或者传递列表的能力,将导致所有记录传递的东西是理想的。
library(tidyverse)
library(lubridate)
#Set up Parameters
parameters_tibble <- tibble(name = character(),
number = numeric(),
acceptable_codes = list(),
unacceptable_codes = list(),
cutoff_date = date(),
.rows = NULL)
parameters_tibble$date <- as_date(parameters_tibble$date)
parameters_tibble <- add_row(parameters_tibble,
name = "param1",
number = 123,
acceptable_codes = list(c("code1", "code2")),
unacceptable_codes = list(NULL),
cutoff_date = as_date("2020-01-01"))
parameters_tibble <- add_row(parameters_tibble,
name = "param2",
number = 456,
acceptable_codes = list(NULL),
unacceptable_codes = list("code72"),
cutoff_date = as_date("2020-01-01"))
#Create sample dataset
data_tibble <- tibble(number = numeric(),
code = character(),
date = date(),
value = numeric(),
.rows=NULL)
data_tibble$date <- as_date(data_tibble$date)
data_tibble <- add_row(data_tibble,
number = rep(c(123,456),6),
code = rep(c("code1", "code2", "code3", "code4"),3),
date = as_date(rep(c("2020-01-01","2019-11-01"),6)),
value = rep(1:12))
doesnotwork <- function(dt = data_tibble, pt = parameters_tibble) {
aggregatedValues <- dt %>%
sum(
filter(number == pt$number &
code %in% pt$acceptable_codes &
!(code %in% pt$acceptable_codes) &
date >= pt$cutoff_date)
$value)
return(aggregatedValues)
}
呸。下次请做一个可重现的例子,例如使用 df 中存在的列(date
在 parameters_tibble 中没有 var,Std.Amt
在 data_tibble 中没有列)。根据您的代码和描述,我计算聚合值的方法使用 unnest
来删除列表列(注意:定义可接受的代码就足够了),然后使用连接 + 一些额外的过滤语句来过滤您的数据根据参数 tibble 进行 tibble。在这一步之后是计算聚合值的简单任务。试试这个:
library(tidyverse)
library(lubridate)
#Set up Parameters
parameters_tibble <- tibble(name = character(),
number = numeric(),
acceptable_codes = list(),
unacceptable_codes = list(),
cutoff_date = date(),
.rows = NULL)
parameters_tibble$cutoff_date <- as_date(parameters_tibble$cutoff_date)
parameters_tibble <- add_row(parameters_tibble,
name = "param1",
number = 123,
acceptable_codes = list(c("code1", "code2")),
unacceptable_codes = list(NULL),
cutoff_date = "2020-01-01")
parameters_tibble <- add_row(parameters_tibble,
name = "param2",
number = 456,
acceptable_codes = list(c("code1", "code2", "code3")),
unacceptable_codes = list("code4"),
cutoff_date = "2020-01-01")
# Unnest: One row for each acceptable code
parameters_tibble_unnest <- parameters_tibble %>%
unnest(c(acceptable_codes, unacceptable_codes))
#Create sample dataset
data_tibble <- tibble(number = numeric(),
code = character(),
date = date(),
value = numeric(),
.rows=NULL)
data_tibble$date <- as_date(data_tibble$date)
data_tibble <- add_row(data_tibble,
number = rep(c(123,456),6),
code = rep(c("code1", "code2", "code3", "code4"),3),
date = as_date(rep(c("2020-01-01","2019-11-01"),6)),
value = rep(1:12))
# Filter using joins
df_join <- left_join(data_tibble, parameters_tibble_unnest, by = c("number", "code" = "acceptable_codes")) %>%
# Drop non-matching data-rows with no match in acceptable codes
filter(!is.na(name)) %>%
# filter for cutoff-date
filter(date >= cutoff_date) %>%
# filter for unacceptable_codes
filter(!code %in% unacceptable_codes)
df_join
#> # A tibble: 3 x 7
#> number code date value name unacceptable_codes cutoff_date
#> <dbl> <chr> <date> <dbl> <chr> <chr> <date>
#> 1 123 code1 2020-01-01 1 param1 <NA> 2020-01-01
#> 2 123 code1 2020-01-01 5 param1 <NA> 2020-01-01
#> 3 123 code1 2020-01-01 9 param1 <NA> 2020-01-01
aggregated_values <- df_join %>%
count(number, wt = value)
aggregated_values
#> # A tibble: 1 x 2
#> number n
#> <dbl> <dbl>
#> 1 123 15
更新:将其放入函数中
get_aggregated_values <- function(dt, pt) {
# Unnest: One row for each acceptable and unacceptable code
pt_unnest <- pt %>%
unnest(c(acceptable_codes, unacceptable_codes))
# Join by number and code == acceptable_codes
dt_join <- left_join(dt, pt_unnest, by = c("number", "code" = "acceptable_codes")) %>%
# Drop non-matching data-rows with no match in acceptable codes
filter(!is.na(name)) %>%
# filter for cutoff-date
filter(date >= cutoff_date) %>%
# filter for unacceptable_codes
filter(!code %in% unacceptable_codes)
aggregated_values <- dt_join %>%
count(number, wt = value)
aggregated_values
}
get_aggregated_values(data_tibble, parameters_tibble)
#> # A tibble: 1 x 2
#> number n
#> <dbl> <dbl>
#> 1 123 15
由 reprex package (v0.3.0)
于 2020-03-25 创建