在 R 中,如何传递一个小标题中给定的参数以对另一个小标题执行操作?

How, in R, to you pass parameters given in one tibble to perform operations on another?

我希望将一些参数存储在一个小标题中,并使用该信息聚合与来自另一个数据集的响应行关联的值。因此,在下面的示例设置中,p1 的 return 将是 data_tibble 中所有记录的总和值,其中数字 = 123,代码为 "code1" 或 "code2"*,日期值等于 2020-01-01。 p2 依此类推。

我不知道该怎么做;我尝试使用 apply 和 separately 创建一个函数来简单地为 运行 本身创建一个函数并产生一个结果向量(如下面的 doesnotwork 函数所示)。

感谢任何帮助!

已编辑以更正拼写错误

*这里有点傻,但我有数百个不同的代码要处理,在某些情况下,除了少数之外,排除所有代码会更容易,而在其他情况下,最好只是包括一些,等等。所以一个 NULL,或者传递列表的能力,将导致所有记录传递的东西是理想的。

library(tidyverse)
library(lubridate)
#Set up Parameters
parameters_tibble <- tibble(name = character(),
                               number = numeric(),
                               acceptable_codes = list(),
                               unacceptable_codes = list(),
                               cutoff_date = date(),
                               .rows = NULL)
parameters_tibble$date <- as_date(parameters_tibble$date)

parameters_tibble <- add_row(parameters_tibble,
                             name = "param1",
                             number = 123,
                             acceptable_codes = list(c("code1", "code2")),
                             unacceptable_codes = list(NULL),
                             cutoff_date = as_date("2020-01-01"))

parameters_tibble <- add_row(parameters_tibble,
                             name = "param2",
                             number = 456,
                             acceptable_codes = list(NULL),
                             unacceptable_codes = list("code72"),
                             cutoff_date = as_date("2020-01-01"))

#Create sample dataset
data_tibble <- tibble(number = numeric(),
                      code = character(),
                      date = date(),
                      value = numeric(),
                      .rows=NULL)
data_tibble$date <- as_date(data_tibble$date)
data_tibble <- add_row(data_tibble,
                       number = rep(c(123,456),6),
                       code = rep(c("code1", "code2", "code3", "code4"),3),
                       date = as_date(rep(c("2020-01-01","2019-11-01"),6)),
                       value = rep(1:12))


doesnotwork <- function(dt = data_tibble, pt = parameters_tibble) {
  aggregatedValues <- dt %>% 
    sum(
      filter(number == pt$number &
               code %in% pt$acceptable_codes &
               !(code %in% pt$acceptable_codes) &
               date >= pt$cutoff_date) 
      $value)
  return(aggregatedValues)
}

呸。下次请做一个可重现的例子,例如使用 df 中存在的列(date 在 parameters_tibble 中没有 var,Std.Amt 在 data_tibble 中没有列)。根据您的代码和描述,我计算聚合值的方法使用 unnest 来删除列表列(注意:定义可接受的代码就足够了),然后使用连接 + 一些额外的过滤语句来过滤您的数据根据​​参数 tibble 进行 tibble。在这一步之后是计算聚合值的简单任务。试试这个:

library(tidyverse)
library(lubridate)

#Set up Parameters
parameters_tibble <- tibble(name = character(),
                            number = numeric(),
                            acceptable_codes = list(),
                            unacceptable_codes = list(),
                            cutoff_date = date(),
                            .rows = NULL)

parameters_tibble$cutoff_date <- as_date(parameters_tibble$cutoff_date)

parameters_tibble <- add_row(parameters_tibble,
                             name = "param1",
                             number = 123,
                             acceptable_codes = list(c("code1", "code2")),
                             unacceptable_codes = list(NULL),
                             cutoff_date = "2020-01-01")

parameters_tibble <- add_row(parameters_tibble,
                             name = "param2",
                             number = 456,
                             acceptable_codes = list(c("code1", "code2", "code3")),
                             unacceptable_codes = list("code4"),
                             cutoff_date = "2020-01-01")

# Unnest: One row for each acceptable code
parameters_tibble_unnest <- parameters_tibble %>% 
  unnest(c(acceptable_codes, unacceptable_codes))

#Create sample dataset
data_tibble <- tibble(number = numeric(),
                      code = character(),
                      date = date(),
                      value = numeric(),
                      .rows=NULL)

data_tibble$date <- as_date(data_tibble$date)

data_tibble <- add_row(data_tibble,
                       number = rep(c(123,456),6),
                       code = rep(c("code1", "code2", "code3", "code4"),3),
                       date = as_date(rep(c("2020-01-01","2019-11-01"),6)),
                       value = rep(1:12))

# Filter using joins
df_join <- left_join(data_tibble, parameters_tibble_unnest, by = c("number", "code" = "acceptable_codes")) %>% 
  # Drop non-matching data-rows with no match in acceptable codes
  filter(!is.na(name)) %>% 
  # filter for cutoff-date
  filter(date >= cutoff_date) %>% 
  # filter for unacceptable_codes
  filter(!code %in% unacceptable_codes)

df_join
#> # A tibble: 3 x 7
#>   number code  date       value name   unacceptable_codes cutoff_date
#>    <dbl> <chr> <date>     <dbl> <chr>  <chr>              <date>     
#> 1    123 code1 2020-01-01     1 param1 <NA>               2020-01-01 
#> 2    123 code1 2020-01-01     5 param1 <NA>               2020-01-01 
#> 3    123 code1 2020-01-01     9 param1 <NA>               2020-01-01

aggregated_values <- df_join %>% 
  count(number, wt = value)
aggregated_values
#> # A tibble: 1 x 2
#>   number     n
#>    <dbl> <dbl>
#> 1    123    15

更新:将其放入函数中

get_aggregated_values <- function(dt, pt) {
  # Unnest: One row for each acceptable and unacceptable code
  pt_unnest <- pt %>% 
    unnest(c(acceptable_codes, unacceptable_codes))

  # Join by number and code == acceptable_codes
  dt_join <- left_join(dt, pt_unnest, by = c("number", "code" = "acceptable_codes")) %>% 
    # Drop non-matching data-rows with no match in acceptable codes
    filter(!is.na(name)) %>% 
    # filter for cutoff-date
    filter(date >= cutoff_date) %>% 
    # filter for unacceptable_codes
    filter(!code %in% unacceptable_codes)

  aggregated_values <- dt_join %>% 
    count(number, wt = value)
  aggregated_values
}

get_aggregated_values(data_tibble, parameters_tibble)
#> # A tibble: 1 x 2
#>   number     n
#>    <dbl> <dbl>
#> 1    123    15

reprex package (v0.3.0)

于 2020-03-25 创建