如何将相关分析结果放入 R 中的 csv table
How to put correlation analysis results in a csv table in R
在 R 中,我需要将两个变量之间相关性分析的结果放在 table.csv 文件中。原始数据框由三列组成:第一列(组)包含观察所属的组,而其他两列(var1 和 var2)包含两个变量的值。
df <- data.frame(group = rep(c("G1", "G2"), each = 10),
var1 = rnorm(20),
var2 = rnorm(20))
我进行了分组相关性分析。
spear <- by(df, df$group, FUN = function(X) cor.test(X$var1, X$var2, method = "spearman"))
我得到了这个输出:
spear
#df$group: G1
# Spearman's rank correlation rho
#data: X$var1 and X$var2
#S = 144, p-value = 0.7329
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
# rho
#0.1272727
#---------------------------------------------------------------------------------------
#df$group: G2
# Spearman's rank correlation rho
#data: X$var1 and X$var2
#S = 122, p-value = 0.4697
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
# rho
#0.2606061
现在我需要获取一个 table,第一行为 header,csv 格式,第一列是组的 id,第一列是该组的 rho 值第二列,第三列中该组的 p-value。 table 应如下所示:
group,rho,pvalue
G1,0.1272727,0.7329
G2,0.2606061,0.4697
我们可以使用lapply()
从输出对象中提取值,将它们转换为数据帧,并将合并后的数据帧写入csv文件。
set.seed(95014) # to make example reproducible
df <- data.frame(group = rep(c("G1", "G2"), each = 10),
var1 = rnorm(20),
var2 = rnorm(20))
spear <- by(df, df$group, FUN = function(X) cor.test(X$var1, X$var2, method = "spearman"))
此时对象 spear
是一个列表,每个组包含一个元素。我们将处理每个元素,提取所需的元素并将它们作为 data.frame()
.
类型的对象列表返回
由于列表中的元素是按组命名的,我们使用unique(df$group)
来驱动lapply()
,使我们能够将名称分配为输出数据框中的列。
# convert to data frame
dfList <- lapply(unique(df$group),function(x){
y <- spear[[x]]
data.frame(group = x,
rho_value = y$estimate,
p_value = y$p.value)
})
最后,我们使用 do.call()
将数据帧列表作为第一个参数绑定到 write.csv()
,从而生成单个数据帧,然后以逗号分隔值格式写入磁盘。
write.csv(do.call(rbind,dfList),"./data/rhoCalcs.csv",row.names=FALSE)
...和输出:
"group","rho_value","p_value"
"G1",0.260606060606061,0.469675250206491
"G2",0.333333333333333,0.348846243872153
您可以在 by
函数中创建一个数据框来提取您想要的值。
result <- do.call(rbind, by(df, df$group, FUN = function(x) {
tmp <- cor.test(x$var1, x$var2, method = "spearman")
data.frame(group = x$group[1], rho = tmp$estimate, p.value = tmp$p.value)
}))
result
# group rho p.value
#G1 G1 -0.261 0.470
#G2 G2 -0.442 0.204
要将数据写入 csv,我们可以使用 write.csv
:
write.csv(result, 'result.csv', row.names = FALSE)
spear
object是两个列表的列表(每组一个htest
列表):
str(spear)
List of 2
$ G1:List of 8
..$ statistic : Named num 140
.. ..- attr(*, "names")= chr "S"
..$ parameter : NULL
..$ p.value : num 0.682
..$ estimate : Named num 0.152
.. ..- attr(*, "names")= chr "rho"
..$ null.value : Named num 0
.. ..- attr(*, "names")= chr "rho"
..$ alternative: chr "two.sided"
..$ method : chr "Spearman's rank correlation rho"
..$ data.name : chr "X$var1 and X$var2"
..- attr(*, "class")= chr "htest"
$ G2:List of 8
..$ statistic : Named num 180
.. ..- attr(*, "names")= chr "S"
..$ parameter : NULL
..$ p.value : num 0.811
..$ estimate : Named num -0.0909
.. ..- attr(*, "names")= chr "rho"
..$ null.value : Named num 0
.. ..- attr(*, "names")= chr "rho"
..$ alternative: chr "two.sided"
..$ method : chr "Spearman's rank correlation rho"
..$ data.name : chr "X$var1 and X$var2"
..- attr(*, "class")= chr "htest"
- attr(*, "dim")= int 2
- attr(*, "dimnames")=List of 1
..$ df$group: chr [1:2] "G1" "G2"
- attr(*, "call")= language by.data.frame(data = df, INDICES = df$group, FUN = function(X) cor.test(X$var1, X$var2, method = "spearman"))
- attr(*, "class")= chr "by"
broom
包中的 tidy
函数可以将 htest
列表转换为 data.frame(真的是小标题)。
library(broom)
lapply(spear, tidy)
$G1
# A tibble: 1 x 5
estimate statistic p.value method alternative
<dbl> <dbl> <dbl> <chr> <chr>
1 0.152 140 0.682 Spearman's rank correlation rho two.sided
$G2
# A tibble: 1 x 5
estimate statistic p.value method alternative
<dbl> <dbl> <dbl> <chr> <chr>
1 -0.0909 180 0.811 Spearman's rank correlation rho two.sided
您仍然有一个列表,但它更实用。使用一些 dplyr
函数,我们可以使这个列表看起来像你想要的那样:
library(dplyr)
lapply(spear, tidy) %>%
bind_rows() %>%
mutate(group = names(spear)) %>%
rename(rho = estimate) %>%
select(group, rho, p.value)
# A tibble: 2 x 3
group rho p.value
<chr> <dbl> <dbl>
1 G1 0.152 0.682
2 G2 -0.0909 0.811
使用 sapply
而不是 by
的解决方案。
gu <- unique(df$group) ## putting the groups into a vector
r <- sapply(gu, function(x)
cor.test(~ var1 + var2, df[df$group == x, ], method="spearman")[c("estimate", "p.value")])
r <- cbind(group=gu, matrix(unlist(r), 2, b=T, dimnames=list(NULL, c("rho", "p_value"))))
write.csv(r, file="xyz.csv", row.names=FALSE, quote=FALSE)
或者,如果您需要更频繁地这样做,您可以将其包装在一个小函数中。
my.cor.test <- function(x) {
ct <- cor.test(~ var1 + var2, df[df$group == x, ],
method="spearman")[c("estimate", "p.value")]
setNames(c(x, unlist(ct)), c("group", "rho", "p_value"))
}
gu <- unique(df$group) ## putting unique groups into a vector
r <- t(sapply(gu, my.cor.test))
write.csv(r, file="xyz.csv", row.names=FALSE, quote=FALSE)
给予
group,rho,p_value
G1,-0.696969696969697,0.0311410954595681
G2,0.0545454545454545,0.891638843953118
在 R 中,我需要将两个变量之间相关性分析的结果放在 table.csv 文件中。原始数据框由三列组成:第一列(组)包含观察所属的组,而其他两列(var1 和 var2)包含两个变量的值。
df <- data.frame(group = rep(c("G1", "G2"), each = 10),
var1 = rnorm(20),
var2 = rnorm(20))
我进行了分组相关性分析。
spear <- by(df, df$group, FUN = function(X) cor.test(X$var1, X$var2, method = "spearman"))
我得到了这个输出:
spear
#df$group: G1
# Spearman's rank correlation rho
#data: X$var1 and X$var2
#S = 144, p-value = 0.7329
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
# rho
#0.1272727
#---------------------------------------------------------------------------------------
#df$group: G2
# Spearman's rank correlation rho
#data: X$var1 and X$var2
#S = 122, p-value = 0.4697
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
# rho
#0.2606061
现在我需要获取一个 table,第一行为 header,csv 格式,第一列是组的 id,第一列是该组的 rho 值第二列,第三列中该组的 p-value。 table 应如下所示:
group,rho,pvalue
G1,0.1272727,0.7329
G2,0.2606061,0.4697
我们可以使用lapply()
从输出对象中提取值,将它们转换为数据帧,并将合并后的数据帧写入csv文件。
set.seed(95014) # to make example reproducible
df <- data.frame(group = rep(c("G1", "G2"), each = 10),
var1 = rnorm(20),
var2 = rnorm(20))
spear <- by(df, df$group, FUN = function(X) cor.test(X$var1, X$var2, method = "spearman"))
此时对象 spear
是一个列表,每个组包含一个元素。我们将处理每个元素,提取所需的元素并将它们作为 data.frame()
.
由于列表中的元素是按组命名的,我们使用unique(df$group)
来驱动lapply()
,使我们能够将名称分配为输出数据框中的列。
# convert to data frame
dfList <- lapply(unique(df$group),function(x){
y <- spear[[x]]
data.frame(group = x,
rho_value = y$estimate,
p_value = y$p.value)
})
最后,我们使用 do.call()
将数据帧列表作为第一个参数绑定到 write.csv()
,从而生成单个数据帧,然后以逗号分隔值格式写入磁盘。
write.csv(do.call(rbind,dfList),"./data/rhoCalcs.csv",row.names=FALSE)
...和输出:
"group","rho_value","p_value"
"G1",0.260606060606061,0.469675250206491
"G2",0.333333333333333,0.348846243872153
您可以在 by
函数中创建一个数据框来提取您想要的值。
result <- do.call(rbind, by(df, df$group, FUN = function(x) {
tmp <- cor.test(x$var1, x$var2, method = "spearman")
data.frame(group = x$group[1], rho = tmp$estimate, p.value = tmp$p.value)
}))
result
# group rho p.value
#G1 G1 -0.261 0.470
#G2 G2 -0.442 0.204
要将数据写入 csv,我们可以使用 write.csv
:
write.csv(result, 'result.csv', row.names = FALSE)
spear
object是两个列表的列表(每组一个htest
列表):
str(spear)
List of 2
$ G1:List of 8
..$ statistic : Named num 140
.. ..- attr(*, "names")= chr "S"
..$ parameter : NULL
..$ p.value : num 0.682
..$ estimate : Named num 0.152
.. ..- attr(*, "names")= chr "rho"
..$ null.value : Named num 0
.. ..- attr(*, "names")= chr "rho"
..$ alternative: chr "two.sided"
..$ method : chr "Spearman's rank correlation rho"
..$ data.name : chr "X$var1 and X$var2"
..- attr(*, "class")= chr "htest"
$ G2:List of 8
..$ statistic : Named num 180
.. ..- attr(*, "names")= chr "S"
..$ parameter : NULL
..$ p.value : num 0.811
..$ estimate : Named num -0.0909
.. ..- attr(*, "names")= chr "rho"
..$ null.value : Named num 0
.. ..- attr(*, "names")= chr "rho"
..$ alternative: chr "two.sided"
..$ method : chr "Spearman's rank correlation rho"
..$ data.name : chr "X$var1 and X$var2"
..- attr(*, "class")= chr "htest"
- attr(*, "dim")= int 2
- attr(*, "dimnames")=List of 1
..$ df$group: chr [1:2] "G1" "G2"
- attr(*, "call")= language by.data.frame(data = df, INDICES = df$group, FUN = function(X) cor.test(X$var1, X$var2, method = "spearman"))
- attr(*, "class")= chr "by"
broom
包中的 tidy
函数可以将 htest
列表转换为 data.frame(真的是小标题)。
library(broom)
lapply(spear, tidy)
$G1
# A tibble: 1 x 5
estimate statistic p.value method alternative
<dbl> <dbl> <dbl> <chr> <chr>
1 0.152 140 0.682 Spearman's rank correlation rho two.sided
$G2
# A tibble: 1 x 5
estimate statistic p.value method alternative
<dbl> <dbl> <dbl> <chr> <chr>
1 -0.0909 180 0.811 Spearman's rank correlation rho two.sided
您仍然有一个列表,但它更实用。使用一些 dplyr
函数,我们可以使这个列表看起来像你想要的那样:
library(dplyr)
lapply(spear, tidy) %>%
bind_rows() %>%
mutate(group = names(spear)) %>%
rename(rho = estimate) %>%
select(group, rho, p.value)
# A tibble: 2 x 3
group rho p.value
<chr> <dbl> <dbl>
1 G1 0.152 0.682
2 G2 -0.0909 0.811
使用 sapply
而不是 by
的解决方案。
gu <- unique(df$group) ## putting the groups into a vector
r <- sapply(gu, function(x)
cor.test(~ var1 + var2, df[df$group == x, ], method="spearman")[c("estimate", "p.value")])
r <- cbind(group=gu, matrix(unlist(r), 2, b=T, dimnames=list(NULL, c("rho", "p_value"))))
write.csv(r, file="xyz.csv", row.names=FALSE, quote=FALSE)
或者,如果您需要更频繁地这样做,您可以将其包装在一个小函数中。
my.cor.test <- function(x) {
ct <- cor.test(~ var1 + var2, df[df$group == x, ],
method="spearman")[c("estimate", "p.value")]
setNames(c(x, unlist(ct)), c("group", "rho", "p_value"))
}
gu <- unique(df$group) ## putting unique groups into a vector
r <- t(sapply(gu, my.cor.test))
write.csv(r, file="xyz.csv", row.names=FALSE, quote=FALSE)
给予
group,rho,p_value
G1,-0.696969696969697,0.0311410954595681
G2,0.0545454545454545,0.891638843953118