使用 OpenMP 的 Gauss Seidel 迭代求解器不收敛
Gauss Siedel iterative solver using OpenMP does not converge
我想使用 OpenMP 编写一个 Gauss Siedel 迭代求解器。我的求解器没有收敛到正确的结果,我也不知道为什么。
控制方程是稳态热方程:del^2(T)=S
,其中 S
是热源函数。 S=-35*pi/2*cos(pi*x/2)*cos(3*pi*y/2)*cos(5*pi*z/2)
我在 dowhile 循环中实现了 OpenMP,因为它不允许我从 do while 循环开始并行。有没有办法把do while循环改成do循环?
结果在没有并行计算的情况下收敛了,但是加入了openmp之后,就不再收敛了。
这是我的代码:
PROGRAM GAUSS_MP
USE omp_lib
IMPLICIT NONE
REAL*8, DIMENSION(:,:,:), ALLOCATABLE :: S, T
REAL*8 :: X, Y, Z
REAL*8, PARAMETER :: PI=2*ASIN(1.0)
REAL*8 :: DX ! STEP SIZE DX=DY=DZ
REAL*8, PARAMETER :: TOL=1.0E-5 ! TOLERANCE
REAL*8 :: DUMAX
REAL*8 :: T_OLD
REAL*8 :: T1,T2
INTEGER, PARAMETER :: N=100 ! GRID NUMBER, START FROM 1
INTEGER, PARAMETER :: ITERMAX=1E5 ! MAXIMUM ITERATION
INTEGER :: I, J, ITER, K
INTEGER :: POINT_NUM
INTEGER, PARAMETER :: NUM_THREADS=32
! INTEGER :: A
! INITIALIZE OPENMP THREADS
CALL OMP_SET_NUM_THREADS(NUM_THREADS)
! COMPUTE STEP SIZE
DX=2.0/REAL(N-1, KIND=8)
! PRINT *, "DX=", DX
! INITIALIZE THE T ARRAYS AND S(I)
ALLOCATE(S(N,N,N), T(N,N,N))
``````````````````````````````````````````````````````````````````````````````
! INITIAL GUESS
T=1.0
``````````````````````````````````````````````````````````````````````````````
! BOUNDARY CONDITION
T(1,:,:)=0.0; T(N,:,:)=0.0; T(:,:,1)=0.0; T(:,:,N)=0.0;
T(:,1,:)=0.0; T(:,N,:)=0.0;
X=0.0D0 ! COORDINATES
Y=0.0D0
S=0.0D0 ! SOURCE
``````````````````````````````````````````````````````````````````````````````
! SOURCE MATRIX
!$OMP PARALLEL DO PRIVATE(I,J,K)
DO K=2,N-1
Z=-1.0+(K-1)*DX
DO I=2,N-1
Y=-1.0+(I-1)*DX
DO J=2,N-1
X=-1.0+(J-1)*DX
S(I,J,K)=-35.0*PI/2.0*COS(PI*X/2.0)*COS(3.0*PI*Y/2.0)&
*COS(5.0*PI*Z/2.0)
END DO
END DO
END DO
!$OMP END PARALLEL DO
``````````````````````````````````````````````````````````````````````````````
! GAUSS-SEIDEL ITERATION
PRINT *, 'PARALLEL START'
T1=OMP_GET_WTIME()
ITER=0
DUMAX=1.0D0 ! UPDATE DIFFERENCE
DO WHILE(ITER <= ITERMAX .AND. DUMAX >= TOL)
ITER=ITER+1
DUMAX=0.0D0
!$OMP PARALLEL PRIVATE(T_OLD, K, I, J, DUMAX)
!$OMP DO REDUCTION(MAX:DUMAX)
DO K=2,N-1
DO I=2, N-1
DO J=2, N-1
T_OLD=T(I,J,K)
T(I,J,K)=1.0/6.0*(T(I+1,J,K)+T(I-1,J,K)+T(I,J-1,K)+T(I,J+1,K) &
+T(I,J,K+1)+T(I,J,K-1) &
-DX**2*S(I,J,K))
DUMAX=MAX(DUMAX, ABS(T_OLD-T(I,J,K)))
END DO
END DO
END DO
!$OMP END DO
!$OMP END PARALLEL
END DO
T2=OMP_GET_WTIME()
END PROGRAM GAUSS_MP
Gauss-Seidel 是一种顺序算法,不容易并行化。如果您查看 T
数组的更新,您会发现您正在从其他线程读取值,当当前线程尝试处理这些值时,这些值可能已更新也可能未更新。这是一个典型的竞争条件。
你基本上有两个选择:
使用循环倾斜 "turn" 循环嵌套 45 度,并使用波前穿过网格。这样,当当前线程想要读取更新的值时,更新的单元格将可用。
使用 OpenMP 4.5 功能“有序依赖”来表达代码中的数据依赖性,并让 OpenMP 编译器添加适当的同步以避免竞争条件。
为了阐明@Michael Klemm 提到的使用 ordered
和 depend
的方法是如何工作的,请考虑以下并行高斯-塞德尔求解器的示例,该求解器解决了 3D 中的泊松问题。
SUBROUTINE gauss_seidel_par(u,f,N,itermax)
REAL(dp), DIMENSION(:,:,:), INTENT(INOUT) :: u
REAL(dp), DIMENSION(:,:,:), INTENT(IN) :: f
INTEGER, INTENT(IN) :: N, itermax
INTEGER :: i, j, k, l
REAL(dp) :: factor1, factor2
factor1 = 1.0_dp/6.0_dp ! Coefficient of u(i,j,k)
factor2 = (2.0_dp/REAL(N+1.0_dp))**2 ! Delta^2
!$omp parallel
DO l = 1,itermax ! run until maximum number of iterations is reached
!$omp do schedule(static,1) ordered(2)
DO k = 2,N-1
DO j = 2,N-1
!$omp ordered depend(sink: k-1,j) depend(sink: k,j-1)
DO i = 2,N-1
u(i,j,k) = factor1*(u(i+1,j,k)+u(i,j+1,k)+u(i,j,k+1)+u(i-1,j,k)+u(i,j-1,k)+u(i,j,k-1) + factor2*f(i,j,k))
ENDDO
!$omp ordered depend(source)
ENDDO
ENDDO
ENDDO
!$omp end parallel
END SUBROUTINE gauss_seidel_par
ordered
子句指出循环应该按顺序传递,即 1,2,3.... 而不是随机顺序。 depend
子句然后指定方案依赖于哪些更新值,在本例中,模板上已经传递的点。
我想使用 OpenMP 编写一个 Gauss Siedel 迭代求解器。我的求解器没有收敛到正确的结果,我也不知道为什么。
控制方程是稳态热方程:del^2(T)=S
,其中 S
是热源函数。 S=-35*pi/2*cos(pi*x/2)*cos(3*pi*y/2)*cos(5*pi*z/2)
我在 dowhile 循环中实现了 OpenMP,因为它不允许我从 do while 循环开始并行。有没有办法把do while循环改成do循环?
结果在没有并行计算的情况下收敛了,但是加入了openmp之后,就不再收敛了。
这是我的代码:
PROGRAM GAUSS_MP
USE omp_lib
IMPLICIT NONE
REAL*8, DIMENSION(:,:,:), ALLOCATABLE :: S, T
REAL*8 :: X, Y, Z
REAL*8, PARAMETER :: PI=2*ASIN(1.0)
REAL*8 :: DX ! STEP SIZE DX=DY=DZ
REAL*8, PARAMETER :: TOL=1.0E-5 ! TOLERANCE
REAL*8 :: DUMAX
REAL*8 :: T_OLD
REAL*8 :: T1,T2
INTEGER, PARAMETER :: N=100 ! GRID NUMBER, START FROM 1
INTEGER, PARAMETER :: ITERMAX=1E5 ! MAXIMUM ITERATION
INTEGER :: I, J, ITER, K
INTEGER :: POINT_NUM
INTEGER, PARAMETER :: NUM_THREADS=32
! INTEGER :: A
! INITIALIZE OPENMP THREADS
CALL OMP_SET_NUM_THREADS(NUM_THREADS)
! COMPUTE STEP SIZE
DX=2.0/REAL(N-1, KIND=8)
! PRINT *, "DX=", DX
! INITIALIZE THE T ARRAYS AND S(I)
ALLOCATE(S(N,N,N), T(N,N,N))
``````````````````````````````````````````````````````````````````````````````
! INITIAL GUESS
T=1.0
``````````````````````````````````````````````````````````````````````````````
! BOUNDARY CONDITION
T(1,:,:)=0.0; T(N,:,:)=0.0; T(:,:,1)=0.0; T(:,:,N)=0.0;
T(:,1,:)=0.0; T(:,N,:)=0.0;
X=0.0D0 ! COORDINATES
Y=0.0D0
S=0.0D0 ! SOURCE
``````````````````````````````````````````````````````````````````````````````
! SOURCE MATRIX
!$OMP PARALLEL DO PRIVATE(I,J,K)
DO K=2,N-1
Z=-1.0+(K-1)*DX
DO I=2,N-1
Y=-1.0+(I-1)*DX
DO J=2,N-1
X=-1.0+(J-1)*DX
S(I,J,K)=-35.0*PI/2.0*COS(PI*X/2.0)*COS(3.0*PI*Y/2.0)&
*COS(5.0*PI*Z/2.0)
END DO
END DO
END DO
!$OMP END PARALLEL DO
``````````````````````````````````````````````````````````````````````````````
! GAUSS-SEIDEL ITERATION
PRINT *, 'PARALLEL START'
T1=OMP_GET_WTIME()
ITER=0
DUMAX=1.0D0 ! UPDATE DIFFERENCE
DO WHILE(ITER <= ITERMAX .AND. DUMAX >= TOL)
ITER=ITER+1
DUMAX=0.0D0
!$OMP PARALLEL PRIVATE(T_OLD, K, I, J, DUMAX)
!$OMP DO REDUCTION(MAX:DUMAX)
DO K=2,N-1
DO I=2, N-1
DO J=2, N-1
T_OLD=T(I,J,K)
T(I,J,K)=1.0/6.0*(T(I+1,J,K)+T(I-1,J,K)+T(I,J-1,K)+T(I,J+1,K) &
+T(I,J,K+1)+T(I,J,K-1) &
-DX**2*S(I,J,K))
DUMAX=MAX(DUMAX, ABS(T_OLD-T(I,J,K)))
END DO
END DO
END DO
!$OMP END DO
!$OMP END PARALLEL
END DO
T2=OMP_GET_WTIME()
END PROGRAM GAUSS_MP
Gauss-Seidel 是一种顺序算法,不容易并行化。如果您查看 T
数组的更新,您会发现您正在从其他线程读取值,当当前线程尝试处理这些值时,这些值可能已更新也可能未更新。这是一个典型的竞争条件。
你基本上有两个选择:
使用循环倾斜 "turn" 循环嵌套 45 度,并使用波前穿过网格。这样,当当前线程想要读取更新的值时,更新的单元格将可用。
使用 OpenMP 4.5 功能“有序依赖”来表达代码中的数据依赖性,并让 OpenMP 编译器添加适当的同步以避免竞争条件。
为了阐明@Michael Klemm 提到的使用 ordered
和 depend
的方法是如何工作的,请考虑以下并行高斯-塞德尔求解器的示例,该求解器解决了 3D 中的泊松问题。
SUBROUTINE gauss_seidel_par(u,f,N,itermax)
REAL(dp), DIMENSION(:,:,:), INTENT(INOUT) :: u
REAL(dp), DIMENSION(:,:,:), INTENT(IN) :: f
INTEGER, INTENT(IN) :: N, itermax
INTEGER :: i, j, k, l
REAL(dp) :: factor1, factor2
factor1 = 1.0_dp/6.0_dp ! Coefficient of u(i,j,k)
factor2 = (2.0_dp/REAL(N+1.0_dp))**2 ! Delta^2
!$omp parallel
DO l = 1,itermax ! run until maximum number of iterations is reached
!$omp do schedule(static,1) ordered(2)
DO k = 2,N-1
DO j = 2,N-1
!$omp ordered depend(sink: k-1,j) depend(sink: k,j-1)
DO i = 2,N-1
u(i,j,k) = factor1*(u(i+1,j,k)+u(i,j+1,k)+u(i,j,k+1)+u(i-1,j,k)+u(i,j-1,k)+u(i,j,k-1) + factor2*f(i,j,k))
ENDDO
!$omp ordered depend(source)
ENDDO
ENDDO
ENDDO
!$omp end parallel
END SUBROUTINE gauss_seidel_par
ordered
子句指出循环应该按顺序传递,即 1,2,3.... 而不是随机顺序。 depend
子句然后指定方案依赖于哪些更新值,在本例中,模板上已经传递的点。