Parallelization/Optimization 个包含 *apply 的 R 循环
Parallelization/Optimization of R loops containing *apply
我正在努力实现一种算法,我尝试使用某种度量从 20 个矢量中找到 5 个 "furthest apart"。为此,我使用 combnPrime 获得了大约 77000 个向量的列表,代表所有 5 向量分组组合。每个向量大约有 25.
为了并行化下面的循环,我尝试了 doParallel 库,但我一直以某种方式搞砸它并得到 -inf 作为结果。我阅读了 doParallel 文档,但无法将我在那里看到的内容应用到我的案例中,很可能是我对 R 知识的缺乏使问题看起来比实际情况要难一些
#df2can be thought of as (thanks to @Oliver):
df2 <- as.data.frame(replicate(20, rnorm(10)))
names(df2) <- LETTERS[1:20]
comb <- combnPrim(df2,5)
range <- length(comb)/5
result_vector <- vector(mode="list",length = range )
for (i in seq(range))
{
total <- as.numeric(0)
for ( j in seq(4))
{
for ( k in seq(j+1,5))
{
diff <- sum( ( mapply( '/',unlist( comb[,i][j] ) - unlist( comb[,i][k] ), ( unlist(comb[,i][j] ) + unlist( comb[,i][k] )) / 2 )^2))
total = total + diff
}
}
result_vector[[i]] <- total
}
所以问题是我如何解决这个问题才能使计算 运行 更快。我的方法是并行化最外层的循环,其中 range
变量约为 15000。所有线程都需要访问梳理和共享变量 result_vector
。我相信我的方法并非不可能,但我需要一些指导。
这种方法依赖于创建辅助函数,然后使用基础 combn()
函数执行内部循环。
fn_dist <- function(x, y){
sum(((x - y) / ((x+y) / 2))^2)
}
system.time({
result_vector3 <- apply(comb, 2, function(comb_i) sum(combn(5, 2, FUN = function(x) fn_dist(comb_i[[x[1]]], comb_i[[x[2]]]))))
})
# user system elapsed
# 1.12 0.00 1.15
使用 apply
是有意为之,因为 future_apply
非常易于使用。不幸的是,它在我的 2 核机器上表现更差:
library(future.apply)
plan(multiprocess)
system.time({
result_vector_future <- future_apply(comb, 2, function(comb_i) sum(combn(5, 2, FUN = function(x) fn_dist(comb_i[[x[1]]], comb_i[[x[2]]]))))
})
# user system elapsed
# 1.59 0.03 1.92
如果您更喜欢 for 循环,这些小的变化使它在性能上与 regure apply
语句相似:
system.time({
for (i in seq(range)){
total <- as.numeric(0)
comb_i <- comb[, i]
for ( j in seq(4))
{
for ( k in seq(j+1,5))
{
diff <- fn_dist(comb_i[[j]], comb_i[[k]])
# diff <- sum( ( (unlist( comb[,i][j] ) - unlist( comb[,i][k] )) / (( unlist(comb[,i][j]) + unlist( comb[,i][k] ) ) / 2 ) )^2 )
total = total + diff
}
}
result_vector[[i]] <- total
}
})
# user system elapsed
# 1.24 0.05 1.32
作为参考,使用@jogo 的建议并仅删除 mapply
有很大帮助,但这些解决方法的帮助更大。
system.time({
for (i in seq(range)){
total <- as.numeric(0)
# comb_i <- comb[, i]
for ( j in seq(4))
{
for ( k in seq(j+1,5))
{
# diff <- fn_dist(comb_i[[j]], comb_i[[k]])
diff <- sum( ( (unlist( comb[,i][j] ) - unlist( comb[,i][k] )) / (( unlist(comb[,i][j]) + unlist( comb[,i][k] ) ) / 2 ) )^2 )
total = total + diff
}
}
result_vector[[i]] <- total
}
})
# user system elapsed
# 2.40 0.06 2.50
最后,这与 dist
非常相似。如果您对默认方法感到满意,您可以使用:
system.time({
results_different_method <- apply(comb,2, function(l) sum(stats::dist(do.call(rbind,l))))
})
# user system elapsed
# 0.70 0.00 0.74
library(proxy)
system.time({
result_same_as_OP <- apply(comb, 2, function (l) sum(proxy::dist(do.call(rbind, l), method = fn_dist)))
})
# user system elapsed
# 1.58 0.05 1.67
我试着把它缩小到一个衬里,但速度较慢:
system.time({
result_final <- combn(ncol(df2), 5, FUN = function(cols) sum(proxy::dist(t(df2[, cols]), method = fn_dist)))
})
user system elapsed
3.71 0.08 3.80
以后再整理这些思路
我测试了两个新变体(函数 iloop()
和 cloop()
):
# https://www.bioconductor.org/packages/release/bioc/html/RBGL.html
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("RBGL")
# BiocManager::install("gRbase")
library("gRbase")
library("proxy") ## proxy::dist()
library("microbenchmark")
#df2can be thought of as (thanks to @Oliver):
df2 <- as.data.frame(replicate(20, rnorm(10)))
names(df2) <- LETTERS[1:20]
comb <- combnPrim(df2,5, simplify = TRUE)
ori <- function(comb) {
range <- length(comb)/5
result_vector <- vector(mode="list",length = range )
for (i in seq(range))
{
total <- as.numeric(0)
for ( j in seq(4))
{
for ( k in seq(j+1,5))
{
diff <- sum( ( mapply( '/',unlist( comb[,i][j] ) - unlist( comb[,i][k] ), ( unlist(comb[,i][j] ) + unlist( comb[,i][k] )) / 2 )^2))
total = total + diff
}
}
result_vector[[i]] <- total
}
return(result_vector)
}
nomapply <- function(comb) {
range <- ncol(comb) ## length(comb)/5
result_vector <- vector(mode="list",length = range )
for (i in seq(range)) {
total <- as.numeric(0)
for ( j in seq(4)) for ( k in seq(j+1,5)) {
diff <- sum( ( (unlist( comb[,i][j] ) - unlist( comb[,i][k] )) /
( unlist(comb[,i][j] ) + unlist( comb[,i][k] )) / 2 )^2)
total = total + diff
}
result_vector[[i]] <- total
}
return(result_vector)
}
ind <- function(comb) {
range <- ncol(comb) ## length(comb)/5
result_vector <- vector(mode="list",length = range )
for (i in seq(range)) {
total <- as.numeric(0)
for ( j in seq(4)) for ( k in seq(j+1,5)) {
diff <- sum( ( (unlist( comb[j,i] ) - unlist( comb[j,i] )) /
( unlist(comb[j,i] ) + unlist( comb[k,i] )) / 2 )^2)
total = total + diff
}
result_vector[[i]] <- total
}
return(result_vector)
}
nounlist <- function(comb) {
range <- ncol(comb) ## length(comb)/5
result_vector <- vector(mode="list",length = range )
for (i in seq(range)) {
total <- as.numeric(0)
for ( j in seq(4)) for ( k in seq(j+1,5)) {
diff <- sum( ( (comb[j,i][[1]] - comb[j,i][[1]]) / ( comb[j,i][[1]] + comb[k,i][[1]]) / 2 )^2)
total = total + diff
}
result_vector[[i]] <- total
}
return(result_vector)
}
range <- ncol(comb) ## length(comb)/5
fn_dist <- function(x, y) sum(((x-y) / ((x+y) / 2))^2)
iloop <- function(i) {
total <- as.numeric(0)
for ( j in seq(4)) {
for ( k in seq(j+1,5)) {
diff <- fn_dist(comb[j,i][[1]], comb[k,i][[1]])
total = total + diff
}
}
return(total)
}
# result_vector <- sapply(1:range, iloop)
cloop <- function(ci) {
total <- as.numeric(0)
for ( j in seq(4)) {
for ( k in seq(j+1,5)) {
diff <- fn_dist(ci[j][[1]], ci[k][[1]])
total = total + diff
}
}
return(total)
}
# result_vector <- apply(comb, 2, cloop)
# r <- apply(comb,2, function(l) sum(proxy::dist(method = fn_dist, do.call(rbind,l))))
microbenchmark(orig=ori(comb), orig2=nomapply(comb), orig3=ind(comb), orig4=nounlist(comb),
iloop=sapply(1:range, iloop), cloop=apply(comb, 2, cloop), unit = "relative",
proxy=apply(comb,2, function(l) sum(proxy::dist(method = fn_dist, do.call(rbind,l)))),
times=10)
这些是结果:
# > microbenchmark(orig=ori(comb), orig2=nomapply(comb), orig3=ind(comb), orig4=nounlist(comb),
# + iloop=sapply(1:range, iloop), cloop=apply(comb, 2, cloop), unit = "relative",
# + proxy=apply(comb,2, function(l) sum(proxy::dist(method = fn_dist, do.call(rbind,l)))),
# + times=10)
# Unit: relative
# expr min lq mean median uq max neval cld
# orig 8.647526 8.648012 8.429268 8.597876 8.551316 7.1967369 10 e
# orig2 2.613248 2.627175 2.564267 2.612007 2.633428 2.1851621 10 d
# orig3 1.949486 1.969982 1.911910 1.933789 1.963484 1.6318174 10 b
# orig4 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 10 a
# iloop 1.127511 1.146384 1.118755 1.149810 1.140409 0.9477470 10 a
# cloop 1.137061 1.154385 1.128315 1.149292 1.143234 0.9702812 10 a
# proxy 2.142964 2.127388 2.078447 2.100761 2.067607 1.9183790 10 c
内部循环中的微小变化带来了最大的性能提升:
- 对向量使用
/
(没有 mapply()
)
- 压缩索引(无双索引)和
- 使用
...[[1]]
而不是 unlist()
。
为了获得清晰的代码,我更喜欢变体 cloop()
或使用 proxy::dist()
.
我正在努力实现一种算法,我尝试使用某种度量从 20 个矢量中找到 5 个 "furthest apart"。为此,我使用 combnPrime 获得了大约 77000 个向量的列表,代表所有 5 向量分组组合。每个向量大约有 25.
为了并行化下面的循环,我尝试了 doParallel 库,但我一直以某种方式搞砸它并得到 -inf 作为结果。我阅读了 doParallel 文档,但无法将我在那里看到的内容应用到我的案例中,很可能是我对 R 知识的缺乏使问题看起来比实际情况要难一些
#df2can be thought of as (thanks to @Oliver):
df2 <- as.data.frame(replicate(20, rnorm(10)))
names(df2) <- LETTERS[1:20]
comb <- combnPrim(df2,5)
range <- length(comb)/5
result_vector <- vector(mode="list",length = range )
for (i in seq(range))
{
total <- as.numeric(0)
for ( j in seq(4))
{
for ( k in seq(j+1,5))
{
diff <- sum( ( mapply( '/',unlist( comb[,i][j] ) - unlist( comb[,i][k] ), ( unlist(comb[,i][j] ) + unlist( comb[,i][k] )) / 2 )^2))
total = total + diff
}
}
result_vector[[i]] <- total
}
所以问题是我如何解决这个问题才能使计算 运行 更快。我的方法是并行化最外层的循环,其中 range
变量约为 15000。所有线程都需要访问梳理和共享变量 result_vector
。我相信我的方法并非不可能,但我需要一些指导。
这种方法依赖于创建辅助函数,然后使用基础 combn()
函数执行内部循环。
fn_dist <- function(x, y){
sum(((x - y) / ((x+y) / 2))^2)
}
system.time({
result_vector3 <- apply(comb, 2, function(comb_i) sum(combn(5, 2, FUN = function(x) fn_dist(comb_i[[x[1]]], comb_i[[x[2]]]))))
})
# user system elapsed
# 1.12 0.00 1.15
使用 apply
是有意为之,因为 future_apply
非常易于使用。不幸的是,它在我的 2 核机器上表现更差:
library(future.apply)
plan(multiprocess)
system.time({
result_vector_future <- future_apply(comb, 2, function(comb_i) sum(combn(5, 2, FUN = function(x) fn_dist(comb_i[[x[1]]], comb_i[[x[2]]]))))
})
# user system elapsed
# 1.59 0.03 1.92
如果您更喜欢 for 循环,这些小的变化使它在性能上与 regure apply
语句相似:
system.time({
for (i in seq(range)){
total <- as.numeric(0)
comb_i <- comb[, i]
for ( j in seq(4))
{
for ( k in seq(j+1,5))
{
diff <- fn_dist(comb_i[[j]], comb_i[[k]])
# diff <- sum( ( (unlist( comb[,i][j] ) - unlist( comb[,i][k] )) / (( unlist(comb[,i][j]) + unlist( comb[,i][k] ) ) / 2 ) )^2 )
total = total + diff
}
}
result_vector[[i]] <- total
}
})
# user system elapsed
# 1.24 0.05 1.32
作为参考,使用@jogo 的建议并仅删除 mapply
有很大帮助,但这些解决方法的帮助更大。
system.time({
for (i in seq(range)){
total <- as.numeric(0)
# comb_i <- comb[, i]
for ( j in seq(4))
{
for ( k in seq(j+1,5))
{
# diff <- fn_dist(comb_i[[j]], comb_i[[k]])
diff <- sum( ( (unlist( comb[,i][j] ) - unlist( comb[,i][k] )) / (( unlist(comb[,i][j]) + unlist( comb[,i][k] ) ) / 2 ) )^2 )
total = total + diff
}
}
result_vector[[i]] <- total
}
})
# user system elapsed
# 2.40 0.06 2.50
最后,这与 dist
非常相似。如果您对默认方法感到满意,您可以使用:
system.time({
results_different_method <- apply(comb,2, function(l) sum(stats::dist(do.call(rbind,l))))
})
# user system elapsed
# 0.70 0.00 0.74
library(proxy)
system.time({
result_same_as_OP <- apply(comb, 2, function (l) sum(proxy::dist(do.call(rbind, l), method = fn_dist)))
})
# user system elapsed
# 1.58 0.05 1.67
我试着把它缩小到一个衬里,但速度较慢:
system.time({
result_final <- combn(ncol(df2), 5, FUN = function(cols) sum(proxy::dist(t(df2[, cols]), method = fn_dist)))
})
user system elapsed
3.71 0.08 3.80
以后再整理这些思路
我测试了两个新变体(函数 iloop()
和 cloop()
):
# https://www.bioconductor.org/packages/release/bioc/html/RBGL.html
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("RBGL")
# BiocManager::install("gRbase")
library("gRbase")
library("proxy") ## proxy::dist()
library("microbenchmark")
#df2can be thought of as (thanks to @Oliver):
df2 <- as.data.frame(replicate(20, rnorm(10)))
names(df2) <- LETTERS[1:20]
comb <- combnPrim(df2,5, simplify = TRUE)
ori <- function(comb) {
range <- length(comb)/5
result_vector <- vector(mode="list",length = range )
for (i in seq(range))
{
total <- as.numeric(0)
for ( j in seq(4))
{
for ( k in seq(j+1,5))
{
diff <- sum( ( mapply( '/',unlist( comb[,i][j] ) - unlist( comb[,i][k] ), ( unlist(comb[,i][j] ) + unlist( comb[,i][k] )) / 2 )^2))
total = total + diff
}
}
result_vector[[i]] <- total
}
return(result_vector)
}
nomapply <- function(comb) {
range <- ncol(comb) ## length(comb)/5
result_vector <- vector(mode="list",length = range )
for (i in seq(range)) {
total <- as.numeric(0)
for ( j in seq(4)) for ( k in seq(j+1,5)) {
diff <- sum( ( (unlist( comb[,i][j] ) - unlist( comb[,i][k] )) /
( unlist(comb[,i][j] ) + unlist( comb[,i][k] )) / 2 )^2)
total = total + diff
}
result_vector[[i]] <- total
}
return(result_vector)
}
ind <- function(comb) {
range <- ncol(comb) ## length(comb)/5
result_vector <- vector(mode="list",length = range )
for (i in seq(range)) {
total <- as.numeric(0)
for ( j in seq(4)) for ( k in seq(j+1,5)) {
diff <- sum( ( (unlist( comb[j,i] ) - unlist( comb[j,i] )) /
( unlist(comb[j,i] ) + unlist( comb[k,i] )) / 2 )^2)
total = total + diff
}
result_vector[[i]] <- total
}
return(result_vector)
}
nounlist <- function(comb) {
range <- ncol(comb) ## length(comb)/5
result_vector <- vector(mode="list",length = range )
for (i in seq(range)) {
total <- as.numeric(0)
for ( j in seq(4)) for ( k in seq(j+1,5)) {
diff <- sum( ( (comb[j,i][[1]] - comb[j,i][[1]]) / ( comb[j,i][[1]] + comb[k,i][[1]]) / 2 )^2)
total = total + diff
}
result_vector[[i]] <- total
}
return(result_vector)
}
range <- ncol(comb) ## length(comb)/5
fn_dist <- function(x, y) sum(((x-y) / ((x+y) / 2))^2)
iloop <- function(i) {
total <- as.numeric(0)
for ( j in seq(4)) {
for ( k in seq(j+1,5)) {
diff <- fn_dist(comb[j,i][[1]], comb[k,i][[1]])
total = total + diff
}
}
return(total)
}
# result_vector <- sapply(1:range, iloop)
cloop <- function(ci) {
total <- as.numeric(0)
for ( j in seq(4)) {
for ( k in seq(j+1,5)) {
diff <- fn_dist(ci[j][[1]], ci[k][[1]])
total = total + diff
}
}
return(total)
}
# result_vector <- apply(comb, 2, cloop)
# r <- apply(comb,2, function(l) sum(proxy::dist(method = fn_dist, do.call(rbind,l))))
microbenchmark(orig=ori(comb), orig2=nomapply(comb), orig3=ind(comb), orig4=nounlist(comb),
iloop=sapply(1:range, iloop), cloop=apply(comb, 2, cloop), unit = "relative",
proxy=apply(comb,2, function(l) sum(proxy::dist(method = fn_dist, do.call(rbind,l)))),
times=10)
这些是结果:
# > microbenchmark(orig=ori(comb), orig2=nomapply(comb), orig3=ind(comb), orig4=nounlist(comb),
# + iloop=sapply(1:range, iloop), cloop=apply(comb, 2, cloop), unit = "relative",
# + proxy=apply(comb,2, function(l) sum(proxy::dist(method = fn_dist, do.call(rbind,l)))),
# + times=10)
# Unit: relative
# expr min lq mean median uq max neval cld
# orig 8.647526 8.648012 8.429268 8.597876 8.551316 7.1967369 10 e
# orig2 2.613248 2.627175 2.564267 2.612007 2.633428 2.1851621 10 d
# orig3 1.949486 1.969982 1.911910 1.933789 1.963484 1.6318174 10 b
# orig4 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 10 a
# iloop 1.127511 1.146384 1.118755 1.149810 1.140409 0.9477470 10 a
# cloop 1.137061 1.154385 1.128315 1.149292 1.143234 0.9702812 10 a
# proxy 2.142964 2.127388 2.078447 2.100761 2.067607 1.9183790 10 c
内部循环中的微小变化带来了最大的性能提升:
- 对向量使用
/
(没有mapply()
) - 压缩索引(无双索引)和
- 使用
...[[1]]
而不是unlist()
。
为了获得清晰的代码,我更喜欢变体 cloop()
或使用 proxy::dist()
.