如何向量化用于排列的 for 循环?
How can I vectorize a for loop used for permutation?
我正在使用 R 进行分析,并想执行排列测试。为此,我使用了一个非常慢的 for
循环,我想使代码尽可能快。我认为矢量化是关键。但是,经过几天的尝试,我仍然没有找到合适的解决方案来重新编码。非常感谢您的帮助!
我有一个对称矩阵,其中包含种群之间的成对生态距离 ("dist.mat"
)。我想随机打乱这个距离矩阵的行和列以生成置换距离矩阵 ("dist.mat.mix"
)。然后,我想将上三角值保存在这个置换距离矩阵(大小为"nr.pairs"
)中。这个过程应该重复几次 ("nr.runs"
)。结果应该是一个矩阵 ("result"
),其中包含多次运行的置换上三角值,维度为 nrow=nr.runs
和 ncol=nr.pairs
。下面是一个使用 for 循环做我想做的事情的示例 R 代码:
# example number of populations
nr.pops <- 20
# example distance matrix
dist.mat <- as.matrix(dist(matrix(rnorm(20), nr.pops, 5)))
# example number of runs
nr.runs <- 1000
# find number of unique pairwise distances in distance matrix
nr.pairs <- nr.pops*(nr.pops-1) / 2
# start loop
result <- matrix(NA, nr.runs, nr.pairs)
for (i in 1:nr.runs) {
mix <- sample(nr.pops, replace=FALSE)
dist.mat.mix <- dist.mat[mix, mix]
result[i, ] <- dist.mat.mix[upper.tri(dist.mat.mix, diag=FALSE)]
}
# inspect result
result
我已经使用 base::replicate
函数进行了一些笨拙的矢量化尝试,但这并没有加快速度。实际上它有点慢:
# my for loop approach
my.for.loop <- function() {
result <- matrix(NA, nr.runs, nr.pairs)
for (i in 1:nr.runs){
mix <- sample(nr.pops, replace=FALSE)
dist.mat.mix <- dist.mat[mix ,mix]
result[i, ] <- dist.mat.mix[upper.tri(dist.mat.mix, diag=FALSE)]
}
}
# my replicate approach
my.replicate <- function() {
results <- t(replicate(nr.runs, {
mix <- sample(nr.pops, replace=FALSE)
dist.mat.mix <- dist.mat[mix, mix]
dist.mat.mix[upper.tri(dist.mat.mix, diag=FALSE)]
}))
}
# compare speed
require(microbenchmark)
microbenchmark(my.for.loop(), my.replicate(), times=100L)
# Unit: milliseconds
# expr min lq mean median uq max neval
# my.for.loop() 23.1792 24.4759 27.1274 25.5134 29.0666 61.5616 100
# my.replicate() 25.5293 27.4649 30.3495 30.2533 31.4267 68.6930 100
如果您知道如何使用简洁的矢量化解决方案加速我的 for 循环,我将非常感谢您的支持。这可能吗?
稍微快一点:
minem <- function() {
result <- matrix(NA, nr.runs, nr.pairs)
ut <- upper.tri(matrix(NA, 4, 4)) # create upper triangular index matrix outside loop
for (i in 1:nr.runs) {
mix <- sample.int(nr.pops) # slightly faster sampling function
result[i, ] <- dist.mat[mix, mix][ut]
}
result
}
microbenchmark(my.for.loop(), my.replicate(), minem(), times = 100L)
# Unit: microseconds
# expr min lq mean median uq max neval cld
# my.for.loop() 75.062 78.222 96.25288 80.1975 104.6915 249.284 100 a
# my.replicate() 118.519 122.667 152.25681 126.0250 165.1355 495.407 100 a
# minem() 45.432 48.000 104.23702 49.5800 52.9380 4848.986 100 a
更新:
我们可以稍微不同地获得必要的矩阵索引,因此我们可以立即对元素进行子集化:
minem4 <- function() {
n <- dim(dist.mat)[1]
ut <- upper.tri(matrix(NA, n, n))
im <- matrix(1:n, n, n)
p1 <- im[ut]
p2 <- t(im)[ut]
dm <- unlist(dist.mat)
si <- replicate(nr.runs, sample.int(nr.pops))
p <- (si[p1, ] - 1L) * n + si[p2, ]
result2 <- matrix(dm[p], nr.runs, nr.pairs, byrow = T)
result2
}
microbenchmark(my.for.loop(), minem(), minem4(), times = 100L)
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# my.for.loop() 13.797526 14.977970 19.14794 17.071401 23.161867 29.98952 100 b
# minem() 8.366614 9.080490 11.82558 9.701725 15.748537 24.44325 100 a
# minem4() 7.716343 8.169477 11.91422 8.723947 9.997626 208.90895 100 a
更新2:
我们可以使用 dqrng
示例函数获得一些额外的加速:
minem5 <- function() {
n <- dim(dist.mat)[1]
ut <- upper.tri(matrix(NA, n, n))
im <- matrix(1:n, n, n)
p1 <- im[ut]
p2 <- t(im)[ut]
dm <- unlist(dist.mat)
require(dqrng)
si <- replicate(nr.runs, dqsample.int(nr.pops))
p <- (si[p1, ] - 1L) * n + si[p2, ]
result2 <- matrix(dm[p], nr.runs, nr.pairs, byrow = T)
result2
}
microbenchmark(my.for.loop(), minem(), minem4(), minem5(), times = 100L)
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# my.for.loop() 13.648983 14.672587 17.713467 15.265771 16.967894 36.18290 100 d
# minem() 8.282466 8.773725 10.679960 9.279602 10.335206 27.03683 100 c
# minem4() 7.719503 8.208984 9.039870 8.493231 9.097873 25.32463 100 b
# minem5() 6.134911 6.379850 7.226348 6.733035 7.195849 19.02458 100 a
我正在使用 R 进行分析,并想执行排列测试。为此,我使用了一个非常慢的 for
循环,我想使代码尽可能快。我认为矢量化是关键。但是,经过几天的尝试,我仍然没有找到合适的解决方案来重新编码。非常感谢您的帮助!
我有一个对称矩阵,其中包含种群之间的成对生态距离 ("dist.mat"
)。我想随机打乱这个距离矩阵的行和列以生成置换距离矩阵 ("dist.mat.mix"
)。然后,我想将上三角值保存在这个置换距离矩阵(大小为"nr.pairs"
)中。这个过程应该重复几次 ("nr.runs"
)。结果应该是一个矩阵 ("result"
),其中包含多次运行的置换上三角值,维度为 nrow=nr.runs
和 ncol=nr.pairs
。下面是一个使用 for 循环做我想做的事情的示例 R 代码:
# example number of populations
nr.pops <- 20
# example distance matrix
dist.mat <- as.matrix(dist(matrix(rnorm(20), nr.pops, 5)))
# example number of runs
nr.runs <- 1000
# find number of unique pairwise distances in distance matrix
nr.pairs <- nr.pops*(nr.pops-1) / 2
# start loop
result <- matrix(NA, nr.runs, nr.pairs)
for (i in 1:nr.runs) {
mix <- sample(nr.pops, replace=FALSE)
dist.mat.mix <- dist.mat[mix, mix]
result[i, ] <- dist.mat.mix[upper.tri(dist.mat.mix, diag=FALSE)]
}
# inspect result
result
我已经使用 base::replicate
函数进行了一些笨拙的矢量化尝试,但这并没有加快速度。实际上它有点慢:
# my for loop approach
my.for.loop <- function() {
result <- matrix(NA, nr.runs, nr.pairs)
for (i in 1:nr.runs){
mix <- sample(nr.pops, replace=FALSE)
dist.mat.mix <- dist.mat[mix ,mix]
result[i, ] <- dist.mat.mix[upper.tri(dist.mat.mix, diag=FALSE)]
}
}
# my replicate approach
my.replicate <- function() {
results <- t(replicate(nr.runs, {
mix <- sample(nr.pops, replace=FALSE)
dist.mat.mix <- dist.mat[mix, mix]
dist.mat.mix[upper.tri(dist.mat.mix, diag=FALSE)]
}))
}
# compare speed
require(microbenchmark)
microbenchmark(my.for.loop(), my.replicate(), times=100L)
# Unit: milliseconds
# expr min lq mean median uq max neval
# my.for.loop() 23.1792 24.4759 27.1274 25.5134 29.0666 61.5616 100
# my.replicate() 25.5293 27.4649 30.3495 30.2533 31.4267 68.6930 100
如果您知道如何使用简洁的矢量化解决方案加速我的 for 循环,我将非常感谢您的支持。这可能吗?
稍微快一点:
minem <- function() {
result <- matrix(NA, nr.runs, nr.pairs)
ut <- upper.tri(matrix(NA, 4, 4)) # create upper triangular index matrix outside loop
for (i in 1:nr.runs) {
mix <- sample.int(nr.pops) # slightly faster sampling function
result[i, ] <- dist.mat[mix, mix][ut]
}
result
}
microbenchmark(my.for.loop(), my.replicate(), minem(), times = 100L)
# Unit: microseconds
# expr min lq mean median uq max neval cld
# my.for.loop() 75.062 78.222 96.25288 80.1975 104.6915 249.284 100 a
# my.replicate() 118.519 122.667 152.25681 126.0250 165.1355 495.407 100 a
# minem() 45.432 48.000 104.23702 49.5800 52.9380 4848.986 100 a
更新: 我们可以稍微不同地获得必要的矩阵索引,因此我们可以立即对元素进行子集化:
minem4 <- function() {
n <- dim(dist.mat)[1]
ut <- upper.tri(matrix(NA, n, n))
im <- matrix(1:n, n, n)
p1 <- im[ut]
p2 <- t(im)[ut]
dm <- unlist(dist.mat)
si <- replicate(nr.runs, sample.int(nr.pops))
p <- (si[p1, ] - 1L) * n + si[p2, ]
result2 <- matrix(dm[p], nr.runs, nr.pairs, byrow = T)
result2
}
microbenchmark(my.for.loop(), minem(), minem4(), times = 100L)
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# my.for.loop() 13.797526 14.977970 19.14794 17.071401 23.161867 29.98952 100 b
# minem() 8.366614 9.080490 11.82558 9.701725 15.748537 24.44325 100 a
# minem4() 7.716343 8.169477 11.91422 8.723947 9.997626 208.90895 100 a
更新2:
我们可以使用 dqrng
示例函数获得一些额外的加速:
minem5 <- function() {
n <- dim(dist.mat)[1]
ut <- upper.tri(matrix(NA, n, n))
im <- matrix(1:n, n, n)
p1 <- im[ut]
p2 <- t(im)[ut]
dm <- unlist(dist.mat)
require(dqrng)
si <- replicate(nr.runs, dqsample.int(nr.pops))
p <- (si[p1, ] - 1L) * n + si[p2, ]
result2 <- matrix(dm[p], nr.runs, nr.pairs, byrow = T)
result2
}
microbenchmark(my.for.loop(), minem(), minem4(), minem5(), times = 100L)
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# my.for.loop() 13.648983 14.672587 17.713467 15.265771 16.967894 36.18290 100 d
# minem() 8.282466 8.773725 10.679960 9.279602 10.335206 27.03683 100 c
# minem4() 7.719503 8.208984 9.039870 8.493231 9.097873 25.32463 100 b
# minem5() 6.134911 6.379850 7.226348 6.733035 7.195849 19.02458 100 a