如何遍历变量列表并将值添加到 R 中的输出数据帧?
How to loop through a variable list and add values to an output dataframe in R?
我正在尝试编写一个函数,它接受一个数据框、一个主变量和一个变量列表,并使用 cor.test 函数。我正在寻找 return 具有变量名称、相关系数和 p 值的数据框。
我目前的代码是:
myCorTest = function(dat, mainVar, varlist)
{
result = data.frame()
mainV = dat[[mainVar]]
for (i in 1:length(varlist)){
var_select = dat[[varlist[i]]]
x = cor.test(mainV, var_select)
R = x$estimate
p = x$p.value
result = cbind(mainVar, varlist, R, p)
}
return(result)
}
我希望输出如下所示:
> myCortest (chol, "bmi", c("sbp", "dbp", "vldl", "hdl", "ldl"))
var1 var2 R p
sbp bmi sbp 0.14927952 3.877523e-02
dbp bmi dbp 0.42636371 6.997094e-10
vldl bmi vldl 0.41033688 4.107925e-09
hdl bmi hdl -0.11984422 9.956239e-02
ldl bmi ldl 0.03449137 6.366170e-01
但我的输出是:
> myCorTest(chol, "bmi", c("sbp","dbp", "vldl", "hdl", "ldl"))
mainVar varlist R p
[1,] "bmi" "sbp" "0.0344913724648321" "0.636617020943996"
[2,] "bmi" "dbp" "0.0344913724648321" "0.636617020943996"
[3,] "bmi" "vldl" "0.0344913724648321" "0.636617020943996"
[4,] "bmi" "hdl" "0.0344913724648321" "0.636617020943996"
[5,] "bmi" "ldl" "0.0344913724648321" "0.636617020943996"
您的代码的问题是 cbind 创建了一个矩阵,其中该矩阵需要其中的所有值都具有相同的数据类型。您需要的是创建一个 data.frame。试试这个:
myCorTest = function(dat, mainVar, varlist)
{
# Create empty data.frame to store all results with its data types
result = data.frame(var1=character(),
var2=character(),
R=numeric(),
p=numeric()
)
mainV = dat[[mainVar]]
for (i in 1:length(varlist)){
var_select = dat[[varlist[i]]]
x = cor.test(mainV, var_select)
R = x$estimate
p = x$p.value
result_temp = data.frame(mainVar, varlist[i], R, p)
row.names(result_temp) = varlist[i]
result = rbind(result,result_temp)
}
colnames(result) = c("var1","var2","R","p")
return(result)
}
myCorTest(chol, "bmi", c("sbp", "dbp", "vldl", "hdl", "ldl"))
在循环中增长 objects/dataframes 是低效的。我会使用 lapply
:
myCorTest = function(dat, mainVar, varlist) {
mainV = dat[[mainVar]]
do.call(rbind, lapply(varlist, function(x) {
temp = cor.test(mainV, dat[[x]])
R = temp$estimate
p = temp$p.value
data.frame(mainVar = mainVar, varlist = x, R, p)
})) -> result
rownames(result) <- NULL
return(result)
}
myCorTest(mtcars, 'mpg', c('cyl', 'am'))
# mainVar varlist R p
#1 mpg cyl -0.852 6.11e-10
#2 mpg am 0.600 2.85e-04
我正在尝试编写一个函数,它接受一个数据框、一个主变量和一个变量列表,并使用 cor.test 函数。我正在寻找 return 具有变量名称、相关系数和 p 值的数据框。
我目前的代码是:
myCorTest = function(dat, mainVar, varlist)
{
result = data.frame()
mainV = dat[[mainVar]]
for (i in 1:length(varlist)){
var_select = dat[[varlist[i]]]
x = cor.test(mainV, var_select)
R = x$estimate
p = x$p.value
result = cbind(mainVar, varlist, R, p)
}
return(result)
}
我希望输出如下所示:
> myCortest (chol, "bmi", c("sbp", "dbp", "vldl", "hdl", "ldl"))
var1 var2 R p
sbp bmi sbp 0.14927952 3.877523e-02
dbp bmi dbp 0.42636371 6.997094e-10
vldl bmi vldl 0.41033688 4.107925e-09
hdl bmi hdl -0.11984422 9.956239e-02
ldl bmi ldl 0.03449137 6.366170e-01
但我的输出是:
> myCorTest(chol, "bmi", c("sbp","dbp", "vldl", "hdl", "ldl"))
mainVar varlist R p
[1,] "bmi" "sbp" "0.0344913724648321" "0.636617020943996"
[2,] "bmi" "dbp" "0.0344913724648321" "0.636617020943996"
[3,] "bmi" "vldl" "0.0344913724648321" "0.636617020943996"
[4,] "bmi" "hdl" "0.0344913724648321" "0.636617020943996"
[5,] "bmi" "ldl" "0.0344913724648321" "0.636617020943996"
您的代码的问题是 cbind 创建了一个矩阵,其中该矩阵需要其中的所有值都具有相同的数据类型。您需要的是创建一个 data.frame。试试这个:
myCorTest = function(dat, mainVar, varlist)
{
# Create empty data.frame to store all results with its data types
result = data.frame(var1=character(),
var2=character(),
R=numeric(),
p=numeric()
)
mainV = dat[[mainVar]]
for (i in 1:length(varlist)){
var_select = dat[[varlist[i]]]
x = cor.test(mainV, var_select)
R = x$estimate
p = x$p.value
result_temp = data.frame(mainVar, varlist[i], R, p)
row.names(result_temp) = varlist[i]
result = rbind(result,result_temp)
}
colnames(result) = c("var1","var2","R","p")
return(result)
}
myCorTest(chol, "bmi", c("sbp", "dbp", "vldl", "hdl", "ldl"))
在循环中增长 objects/dataframes 是低效的。我会使用 lapply
:
myCorTest = function(dat, mainVar, varlist) {
mainV = dat[[mainVar]]
do.call(rbind, lapply(varlist, function(x) {
temp = cor.test(mainV, dat[[x]])
R = temp$estimate
p = temp$p.value
data.frame(mainVar = mainVar, varlist = x, R, p)
})) -> result
rownames(result) <- NULL
return(result)
}
myCorTest(mtcars, 'mpg', c('cyl', 'am'))
# mainVar varlist R p
#1 mpg cyl -0.852 6.11e-10
#2 mpg am 0.600 2.85e-04