是否可以将 lapply() (或其他命令)与 r 中的多个列表对象输入一起使用?

Is it possible to use lapply() (or some other command) with multiple list object inputs in r?

我想将 lapply() 用于多个列表输入。

具体来说,我想 运行 lm() 测试不同的 IV、DV 和数据集。

我从 ggplot2::diamonds 数据集创建了 2 个数据集,分别称为 diamonds_top300diamonds_bottom300。我想对这些数据集进行 运行 lm() 测试,其中 IV 为 xy,DV 为 pricecarat

我可以做很长的路要走,使用下面的代码:

## long way
# ---- NOTE: works

### DV is price, IV is x, and dataset is diamonds_top300
lm__dataset_is_diamonds_top300__DV_is_price__IV_is_x <-
  lm(price ~ x, data = diamonds_top300)

### DV is price, IV is y, and dataset is diamonds_top300
lm__dataset_is_diamonds_top300__DV_is_price__IV_is_y <-
  lm(price ~ y, data = diamonds_top300)

### DV is carat, IV is x, and dataset is diamonds_top300
lm__dataset_is_diamonds_top300__DV_is_carat__IV_is_x <-
  lm(carat ~ x, data = diamonds_top300)

### DV is carat, IV is y, and dataset is diamonds_top300
lm__dataset_is_diamonds_top300__DV_is_carat__IV_is_y <-
  lm(carat ~ y, data = diamonds_top300)

### DV is price, IV is x, and dataset is diamonds_bottom300
lm__dataset_is_diamonds_bottom300__DV_is_price__IV_is_x <-
  lm(price ~ x, data = diamonds_bottom300)

### DV is price, IV is y, and dataset is diamonds_bottom300
lm__dataset_is_diamonds_bottom300__DV_is_price__IV_is_y <-
  lm(price ~ y, data = diamonds_bottom300)

### DV is carat, IV is x, and dataset is diamonds_bottom300
lm__dataset_is_diamonds_bottom300__DV_is_carat__IV_is_x <-
  lm(carat ~ x, data = diamonds_bottom300)

### DV is carat, IV is y, and dataset is diamonds_bottom300
lm__dataset_is_diamonds_bottom300__DV_is_carat__IV_is_y <-
  lm(carat ~ y, data = diamonds_bottom300)

当我尝试使用下面的 lapply() 代码以较短的方式进行时,但这没有用。我想使用 3 个列表输入(即,一个用于使用的数据集,一个用于使用的 DV,一个用于使用的 IV)来做到这一点。

## short way, using lapply()
# ---- NOTE: does not work

### creates list object
lm__dataset_is_XXXX__DV_is_XXXX__IV_is_XXXX <- 
  lapply(
    # ---- NOTE: tells dataset used in analysis
    DV_info$dataset_analyses,
    # ---- NOTE: tells DV used in analysis
    DV_info$DV_original,
    # ---- NOTE: tells IV used in analysis
    IV_info$IV_original,
    function(
      # ---- NOTE: name of function object input for dataset used in lapply() object
      dataset_list,
      # ---- NOTE: name of function object input for DV used in lapply() object
      DV_list,
      # ---- NOTE: name of function object input for IV used in lapply() object
      IV_list
             ) {
      # ---- NOTE: creates _funct_object versions of function() inputs
      IV_funct_object <- 
        IV_list
      DV_funct_object <- 
        DV_list
      dataset_funct_object <- 
        dataset_list
      # ---- NOTE: creates 
      lm_funct_object <- 
        lm(DV_funct_object ~ IV_funct_object, data = dataset_funct_object)
      # ---- NOTE: returns object
      return(lm_funct_object)
    }
  )

### changes list object name
lm__dataset_is_XXXX__DV_is_XXXX__IV_is_XXXX <- 
  setNames(
    lm__dataset_is_XXXX__DV_is_XXXX__IV_is_XXXX, 
    paste(
      "list_derived_object_",
      "lm",
      "_dataset_is",
      DV_info$dataset_analyses,
      "_DV_is",
      DV_info$DV_original,
      "_IV_is",
      IV_info$IV_original,
      sep = "_"
           )
  )

如果可能的话,我想使用 lapply() 来执行此操作,因为我可以修改与 lapply() 关联的代码,但我愿意接受其他选项(例如,使用mapply() 带有预制的用户生成函数)。

非常感谢任何建议。

仅供参考,我使用的是 2013 Macbook Pro,配备 2.4 GHz 双核英特尔芯片、8 GB 内存、macOS big sur 11.2.2、RStudio 版本 1.4.1106 和 R 基础包 4.04。

谢谢。



我使用的R脚本如下:

#### lapply() with multiple objects ####




# Loads packages
# ---- NOTE: making plots and diamonds dataset
if(!require(ggplot2)){install.packages("ggplot2")}
# ---- NOTE: for data wrangling
if(!require(dplyr)){install.packages("dplyr")}




# dataset creation

## for dataset with top 300 rows
# ---- NOTE: selects only the top 300 rows of the dataset
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
# ---- NOTE: gives dataset info
head(diamonds_top300)
str(diamonds_top300)
colnames(diamonds_top300)
nrow(diamonds_top300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_top300$price)
unique(diamonds_top300$y)
unique(diamonds_top300$cut)
unique(diamonds_top300$color)
unique(diamonds_top300$carat)
unique(diamonds_top300$clarity)
unique(diamonds_top300$depth)
unique(diamonds_top300$table)

## for dataset with bottom 300 rows
### dataset
# ---- NOTE: selects only the bottom 300 rows of the dataset
diamonds_bottom300 <- data.frame(dplyr::top_n(diamonds, -300, table))
# ---- NOTE: gives dataset info
head(diamonds_bottom300)
str(diamonds_bottom300)
colnames(diamonds_bottom300)
nrow(diamonds_bottom300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_bottom300$price)
unique(diamonds_bottom300$y)
unique(diamonds_bottom300$cut)
unique(diamonds_bottom300$color)
unique(diamonds_bottom300$carat)
unique(diamonds_bottom300$clarity)
unique(diamonds_bottom300$depth)
unique(diamonds_bottom300$table)

## creates DV_info object
# ---- NOTE: length of data needs to match up with the maximum number of tests being done (i.e., 8 tests total means this list based data frame needs to be 8 rows in length)
DV_info <- 
  data.frame(
    DV_original = c("price", "carat", "price", "carat", "price", "carat", "price", "carat"),
    dataset_analyses = c("diamonds_top300", "diamonds_top300", "diamonds_top300", "diamonds_top300", "diamonds_bottom300", "diamonds_bottom300", "diamonds_bottom300")
  )

## creates IV_info object
# ---- NOTE: length of data needs to match up with the maximum number of tests being done (i.e., 8 tests total means this list based data frame needs to be 8 rows in length)
IV_info <- 
  data.frame(
    IV_original = c("x", "y", "x", "y", "x", "y", "x", "y")
  )




# creating lm() objects

## long way
# ---- NOTE: works

### DV is price, IV is x, and dataset is diamonds_top300
lm__dataset_is_diamonds_top300__DV_is_price__IV_is_x <-
  lm(price ~ x, data = diamonds_top300)

### DV is price, IV is y, and dataset is diamonds_top300
lm__dataset_is_diamonds_top300__DV_is_price__IV_is_y <-
  lm(price ~ y, data = diamonds_top300)

### DV is carat, IV is x, and dataset is diamonds_top300
lm__dataset_is_diamonds_top300__DV_is_carat__IV_is_x <-
  lm(carat ~ x, data = diamonds_top300)

### DV is carat, IV is y, and dataset is diamonds_top300
lm__dataset_is_diamonds_top300__DV_is_carat__IV_is_y <-
  lm(carat ~ y, data = diamonds_top300)

### DV is price, IV is x, and dataset is diamonds_bottom300
lm__dataset_is_diamonds_bottom300__DV_is_price__IV_is_x <-
  lm(price ~ x, data = diamonds_bottom300)

### DV is price, IV is y, and dataset is diamonds_bottom300
lm__dataset_is_diamonds_bottom300__DV_is_price__IV_is_y <-
  lm(price ~ y, data = diamonds_bottom300)

### DV is carat, IV is x, and dataset is diamonds_bottom300
lm__dataset_is_diamonds_bottom300__DV_is_carat__IV_is_x <-
  lm(carat ~ x, data = diamonds_bottom300)

### DV is carat, IV is y, and dataset is diamonds_bottom300
lm__dataset_is_diamonds_bottom300__DV_is_carat__IV_is_y <-
  lm(carat ~ y, data = diamonds_bottom300)

### lists created file(s)
# ---- NOTE: list of object, out right
lm__dataset_is_diamonds_top300__DV_is_price__IV_is_x
lm__dataset_is_diamonds_top300__DV_is_price__IV_is_y
lm__dataset_is_diamonds_top300__DV_is_carat__IV_is_x
lm__dataset_is_diamonds_top300__DV_is_carat__IV_is_y
lm__dataset_is_diamonds_bottom300__DV_is_price__IV_is_x
lm__dataset_is_diamonds_bottom300__DV_is_price__IV_is_y
lm__dataset_is_diamonds_bottom300__DV_is_carat__IV_is_x
lm__dataset_is_diamonds_bottom300__DV_is_carat__IV_is_y
# ---- NOTE: apropos() command list
apropos("lm__dataset_is_")

## short way, using lapply()
# ---- NOTE: does not work

### creates list object
lm__dataset_is_XXXX__DV_is_XXXX__IV_is_XXXX <- 
  lapply(
    # ---- NOTE: tells dataset used in analysis
    DV_info$dataset_analyses,
    # ---- NOTE: tells DV used in analysis
    DV_info$DV_original,
    # ---- NOTE: tells IV used in analysis
    IV_info$IV_original,
    function(
      # ---- NOTE: name of function object input for dataset used in lapply() object
      dataset_list,
      # ---- NOTE: name of function object input for DV used in lapply() object
      DV_list,
      # ---- NOTE: name of function object input for IV used in lapply() object
      IV_list
             ) {
      # ---- NOTE: creates _funct_object versions of function() inputs
      IV_funct_object <- 
        IV_list
      DV_funct_object <- 
        DV_list
      dataset_funct_object <- 
        dataset_list
      # ---- NOTE: creates 
      lm_funct_object <- 
        lm(DV_funct_object ~ IV_funct_object, data = dataset_funct_object)
      # ---- NOTE: returns object
      return(lm_funct_object)
    }
  )

### changes list object name
lm__dataset_is_XXXX__DV_is_XXXX__IV_is_XXXX <- 
  setNames(
    lm__dataset_is_XXXX__DV_is_XXXX__IV_is_XXXX, 
    paste(
      "list_derived_object_",
      "lm",
      "_dataset_is",
      DV_info$dataset_analyses,
      "_DV_is",
      DV_info$DV_original,
      "_IV_is",
      IV_info$IV_original,
      sep = "_"
           )
  )

你可以利用Map-

Map(function(x, y, z) lm(reformulate(x, y), data = z),
  IV_info$IV_original, DV_info$DV_original, mget(DV_info$dataset_analyses))

考虑使用 expand.grid 构建输入的所有组合,然后使用 reformulate 调用 Map 以从字符串动态构建公式,并调用 get 以按字符名称检索对象.

run_model <- function(dv, iv, data)
    lm(reformulate(iv, dv), data=get(data))

inputs <- expand.grid(
    dv = c("price", "carat"),
    iv = c("x", "y"),
    data = c("diamonds_bottom300", "diamonds_top300")
)

lm_results <- Map(run_model, inputs$dv, inputs$iv, inputs$data)
# EQUIVALENTLY:
lm_results <- mapply(run_model, inputs$dv, inputs$iv, inputs$data, SIMPLIFY = FALSE)