有没有一种直接的方法可以在 Fortran 中使用随机数生成器进行并发计算?
Is there a straightforward way to do concurrent calculations with a random number generator in Fortran?
我有一些在 Fortran 95 中编写的有限元代码,我已经对其进行了优化,因此我现在可以获得超过 1600 万。在 2GB 内存占用下工作的元素。
我的代码的源函数不流畅,所以我使用(分层的)蒙特卡洛方法进行积分,这需要一个随机数生成器来select个样本位置
我已经尝试使用 -fopenmp -Ofast -ftree-parallelize-loops=4
与 gfortran-9 一起编译,但是随机数生成器的循环不会并行。我尝试了 do concurrent
,但显然没有用,因为 random_number
不是 'pure'。
我也尝试阻止我的循环,但这也没有用。
这是我正在谈论的代码
do k=1,n_els ! total elements is n_els**2. This is block
do i=1+ (k-1)*n_els ,k*n_els
supp_vec = 0
integ_vec = 0.0_wp
! in this subroutine I call random_number
call do_element(ind, n_els, i, num_points_per_strat, &
strat_rows, strat_cols, supp_vec, integ_vec)
do j=1, 4
sc_vec(supp_vec(j) ) = integ_vec(j)
end do
! give some info about progress
if (mod( i , (n_els**2)/10) == 0) print*, i*10/((n_els**2)/10), "% done"
end do
end do
看来我可以将块写入文件并调用 n
例程的不同实例。我认为必须有一种更清洁的方法来做到这一点。关于如何更快地进行的任何提示?
我正在考虑首先将一个块值的点(取决于内存限制)写入一个数组,然后在子例程调用中提供它。在我尝试之前,我想我会看看是否有人对更好的方法有任何建议。最好尽可能减少内存占用。
从版本 7 开始,GFortran 具有并行随机数生成器。在实现它时,这是我用来验证性能确实随着线程数量的增加而扩展的 OpenMP 代码(来自 https://gcc.gnu.org/ml/gcc-patches/2015-12/msg02110.html ):
! Benchmark generating random numbers
! Janne Blomqvist 2015
program randbench
#ifdef _OPENMP
use omp_lib
#endif
implicit none
integer, parameter :: dp=kind(0.d0) ! double precision
integer, parameter :: i64 = selected_int_kind(18) ! At least 64-bit integer
#ifdef _OPENMP
print *, "Using up to ", omp_get_max_threads(), " threads."
#endif
call genr4
call genr8
contains
subroutine genr4
integer, parameter :: n = int(1e7)
real, save :: r(n)
integer :: i
integer(i64) :: t1, t2, td
#ifdef _OPENMP
integer :: blocks, blocksize, l, h
#endif
Print *, "Generate default real random variables"
call system_clock (t1)
!$omp parallel do private(i)
do i = 1, n
call random_number(r(i))
end do
!$omp end parallel do
call system_clock (t2)
td = t2 - t1
print *, "Generating ", n, " default reals individually took ", td, " ticks."
call system_clock (t1)
#ifdef _OPENMP
blocks = omp_get_max_threads()
blocksize = n / blocks
!$omp parallel do private(l,h,i)
do i = 0, blocks - 1
l = i * blocksize + 1
h = l + blocksize - 1
!print *, "Low: ", l, " High: ", h
call random_number(r(l:h))
end do
#else
call random_number(r)
#endif
Call system_clock (t2)
print *, "Generating ", n, " default reals as an array took ", t2-t1, &
" ticks. => ind/arr = ", real(td, dp) / (t2-t1)
end subroutine genr4
subroutine genr8
integer, parameter :: n = int(1e7)
real(dp), save :: r(n)
integer :: i
integer(i64) :: t1, t2, td
#ifdef _OPENMP
integer :: blocks, blocksize, l, h
#endif
print *, "Generate double real random variables"
call system_clock (t1)
!$omp parallel do
do i = 1, n
call random_number(r(i))
end do
call system_clock (t2)
td = t2 - t1
print *, "Generating ", n, " double reals individually took ", td, " ticks."
call system_clock (t1)
#ifdef _OPENMP
blocks = omp_get_max_threads()
blocksize = n / blocks
!$omp parallel do private(l,h,i)
do i = 0, blocks - 1
l = i * blocksize + 1
h = l + blocksize - 1
!print *, "Low: ", l, " High: ", h
call random_number(r(l:h))
end do
#else
call random_number(r)
#endif
call system_clock (t2)
print *, "Generating ", n, " double reals as an array took ", t2-t1, &
" ticks. => ind/arr = ", real(td, dp) / (t2 -t1)
end subroutine genr8
end program
我有一些在 Fortran 95 中编写的有限元代码,我已经对其进行了优化,因此我现在可以获得超过 1600 万。在 2GB 内存占用下工作的元素。
我的代码的源函数不流畅,所以我使用(分层的)蒙特卡洛方法进行积分,这需要一个随机数生成器来select个样本位置
我已经尝试使用 -fopenmp -Ofast -ftree-parallelize-loops=4
与 gfortran-9 一起编译,但是随机数生成器的循环不会并行。我尝试了 do concurrent
,但显然没有用,因为 random_number
不是 'pure'。
这是我正在谈论的代码
do k=1,n_els ! total elements is n_els**2. This is block
do i=1+ (k-1)*n_els ,k*n_els
supp_vec = 0
integ_vec = 0.0_wp
! in this subroutine I call random_number
call do_element(ind, n_els, i, num_points_per_strat, &
strat_rows, strat_cols, supp_vec, integ_vec)
do j=1, 4
sc_vec(supp_vec(j) ) = integ_vec(j)
end do
! give some info about progress
if (mod( i , (n_els**2)/10) == 0) print*, i*10/((n_els**2)/10), "% done"
end do
end do
看来我可以将块写入文件并调用 n
例程的不同实例。我认为必须有一种更清洁的方法来做到这一点。关于如何更快地进行的任何提示?
我正在考虑首先将一个块值的点(取决于内存限制)写入一个数组,然后在子例程调用中提供它。在我尝试之前,我想我会看看是否有人对更好的方法有任何建议。最好尽可能减少内存占用。
从版本 7 开始,GFortran 具有并行随机数生成器。在实现它时,这是我用来验证性能确实随着线程数量的增加而扩展的 OpenMP 代码(来自 https://gcc.gnu.org/ml/gcc-patches/2015-12/msg02110.html ):
! Benchmark generating random numbers
! Janne Blomqvist 2015
program randbench
#ifdef _OPENMP
use omp_lib
#endif
implicit none
integer, parameter :: dp=kind(0.d0) ! double precision
integer, parameter :: i64 = selected_int_kind(18) ! At least 64-bit integer
#ifdef _OPENMP
print *, "Using up to ", omp_get_max_threads(), " threads."
#endif
call genr4
call genr8
contains
subroutine genr4
integer, parameter :: n = int(1e7)
real, save :: r(n)
integer :: i
integer(i64) :: t1, t2, td
#ifdef _OPENMP
integer :: blocks, blocksize, l, h
#endif
Print *, "Generate default real random variables"
call system_clock (t1)
!$omp parallel do private(i)
do i = 1, n
call random_number(r(i))
end do
!$omp end parallel do
call system_clock (t2)
td = t2 - t1
print *, "Generating ", n, " default reals individually took ", td, " ticks."
call system_clock (t1)
#ifdef _OPENMP
blocks = omp_get_max_threads()
blocksize = n / blocks
!$omp parallel do private(l,h,i)
do i = 0, blocks - 1
l = i * blocksize + 1
h = l + blocksize - 1
!print *, "Low: ", l, " High: ", h
call random_number(r(l:h))
end do
#else
call random_number(r)
#endif
Call system_clock (t2)
print *, "Generating ", n, " default reals as an array took ", t2-t1, &
" ticks. => ind/arr = ", real(td, dp) / (t2-t1)
end subroutine genr4
subroutine genr8
integer, parameter :: n = int(1e7)
real(dp), save :: r(n)
integer :: i
integer(i64) :: t1, t2, td
#ifdef _OPENMP
integer :: blocks, blocksize, l, h
#endif
print *, "Generate double real random variables"
call system_clock (t1)
!$omp parallel do
do i = 1, n
call random_number(r(i))
end do
call system_clock (t2)
td = t2 - t1
print *, "Generating ", n, " double reals individually took ", td, " ticks."
call system_clock (t1)
#ifdef _OPENMP
blocks = omp_get_max_threads()
blocksize = n / blocks
!$omp parallel do private(l,h,i)
do i = 0, blocks - 1
l = i * blocksize + 1
h = l + blocksize - 1
!print *, "Low: ", l, " High: ", h
call random_number(r(l:h))
end do
#else
call random_number(r)
#endif
call system_clock (t2)
print *, "Generating ", n, " double reals as an array took ", t2-t1, &
" ticks. => ind/arr = ", real(td, dp) / (t2 -t1)
end subroutine genr8
end program