如何使用多个嵌套 for 循环加速 python 2 程序

Question

这段代码有多个for循环，我读到的列表每个都有999分。我想将其迭代多达 10,000 次。然而，即使只迭代 2 次也需要将近 10 分钟。

即使我发布了这个特定的代码，我认为我的问题的答案可以帮助其他人运行他们的代码更快地处理大量数据。

感谢您的任何建议。非常感谢。

此代码的作用： 基本上，我从文本文件中读取数组作为列表。每个列表（例如 x1、y1、z1...等）各有 999 个元素。我根据其他元素（两个内部循环）对列表中的每个元素进行操作。最终结果是一个全新的列表，我称之为 x2。然后这段代码应该重复操作 "n # of times" （外循环）。

我的问题是我只能重复这个 a 几次迭代，然后才需要很长时间才能执行。

import matplotlib.pyplot as plt
from astropy.table import Table
from astropy.io import ascii
import numpy as np
import argparse
import time
#for 200
start_time = time.time()

npoints=999
n1, mass1, x1, y1,z1,vx1,vy1,vz1,fx_list,fy_list,fz_list= [],[],[],[],[],[],[],[],[],[],[]
AngL_list=[]
Etot0_list=[]
G=1
dt=.01

with open('homo_sph_N1000_R3_v1.dat') as f:
     for row in f.readlines():  
        if not row.startswith("#"):
            spaces=row.split('   ')
            n1.append(float(spaces[0]))
            mass1.append(float(spaces[1]))
            x1.append(float(spaces[2]))
            y1.append(float(spaces[3]))
            z1.append(float(spaces[4]))
            vx1.append(float(spaces[5]))
            vy1.append(float(spaces[6]))
            vz1.append(float(spaces[7]))

for n in range(2):
#changes the particle on which the forces are acting
     for xn in range(0,npoints):
     #changes the forces from other particles acting on the particle
          for step in range(0,npoints):
          #Here we find the accelearation for every particle
               fx=((G*mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/  (  abs((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2.+(.2)**2  )**(3./2.))

               fy=((G*mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/  (    abs((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2+(.2)**2 )**(3./2.))

               fz=((G*mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/  (    abs((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2+(.2)**2 )**(3./2.))

               #Then put store it in an array
               fx_list.append(fx)
               fy_list.append(fy)
               fz_list.append(fz)


          #Now, I need to split that array up by npoints, each particle has npoints forces acting on it. 
          fxx= np.array_split(fx_list,npoints)
          fyy= np.array_split(fy_list,npoints)
          fzz= np.array_split(fz_list,npoints)

          #since the force on a particle is the sum of all forces acting on it, I'm summing each variable in each array together. e.g. [1,2,3]=[6] 
          fxxx_list=[]
          fyyy_list=[]
          fzzz_list=[]
          for xn in range(0,npoints):
               fxxx= np.sum(fxx[xn])
               fyyy= np.sum(fyy[xn])
               fzzz= np.sum(fzz[xn]) 

               #and save that in array. Now I have the accelearation on each particle. 
               fxxx_list.append(fxxx)
               fyyy_list.append(fyyy)
               fzzz_list.append(fzzz) 

          #This is where i begin the integration

          vx2=[]
          vy2=[]
          vz2=[] 
          for xn in range(0,npoints):

               vx11=vx1[xn]+.5*(fxxx_list[xn]+fxxx_list[xn])*dt
               vy11=vy1[xn]+.5*(fyyy_list[xn]+fyyy_list[xn])*dt
               vz11=vz1[xn]+.5*(fzzz_list[xn]+fyyy_list[xn])*dt 

               vx2.append(vx11)
               vy2.append(vy11)
               vz2.append(vz11) 

          x2=[]
          y2=[]
          z2=[]
          for xn in range(0,npoints):
               x11=(x1[xn]+vx2[xn]*dt)+(.5*fxxx_list[xn]*(dt**2))
               y11=(y1[xn]+vy2[xn]*dt)+(.5*fyyy_list[xn]*(dt**2))
               z11=(z1[xn]+vz2[xn]*dt)+(.5*fzzz_list[xn]*(dt**2)) 

               x2.append(x11)
               y2.append(y11)
               z2.append(z11)

x1,y1,z1,vx1,vy1,vz1 = x2,y2,z2,vx2,vy2,vz2

print x2,y2 
plt.scatter(x2,y2)

print("--- %s seconds ---" % (time.time() - start_time))    

plt.show()

Answer 1

这只是一个小的加速，但代码似乎做了很多 x**2（x 平方）。

在python3中，通常执行x**2比x*x慢。考虑一个简单的测试程序：

import time

iteration_count=99999999

# Do a lot of squaring with the ** operator
start1 = time.time()
sum = 0
for i in range( iteration_count ):
    sum += i ** 2
end1 = time.time()


# Do a lot of squaring with i * i
start2 = time.time()
sum = 0
for i in range( iteration_count ):
    sum += i * i
end2 = time.time()

print("x**2 => %f seconds" % (end1-start1))
print("x*x  => %f seconds" % (end2-start2))

这给了我结果：

$ python3 ./squared.py 
x**2 => 21.347830 seconds
x*x  => 8.983334 seconds

我做了很多次运行，变化不大。

问题中的代码做了很多计算来使fx、fy和fz（这似乎每个都一样？这是正确的吗？）如果这些计算有共同点，中间结果应该去掉，只计算一次。

例如，而不是：

fx=((G*mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.) ...
fy=((G*mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.) ...
fz=((G*mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.) ...

第一部分应该只计算一次：

g_mass = G*mass1[xn]*mass1[step+1]
fx=((g_mass * ((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.) ...
fy=((g_mass * ((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.) ...
fz=((g_mass * ((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.) ...

类似地，对于这些公式中具有共同成分的任何部分。

Answer 2

我认为如果将输入转换为 numpy 数组，对 numpy 数组进行操作，并将 numpy 数组预分配到它们所需的大小，您应该能够获得很大的加速（可能提高 1000 倍），而不是使用列表并随时附加它们。

例如，仅以您的示例开始，您可以执行以下操作（虽然我不能保证它完全按照您的要求行事，但这只是指导）

with open('homo_sph_N1000_R3_v1.dat') as f:
    for row in f.readlines():  
        if not row.startswith("#"):
            spaces=row.split('   ')
            n1.append(float(spaces[0]))
            mass1.append(float(spaces[1]))
            x1.append(float(spaces[2]))
            y1.append(float(spaces[3]))
            z1.append(float(spaces[4]))
            vx1.append(float(spaces[5]))
            vy1.append(float(spaces[6]))
            vz1.append(float(spaces[7]))

# convert to numpy arrays
n1 = np.array(n1)
mass1 = np.array(mass1)
# KEEP DOING THIS FOR THE OTHER INPUTS

for n in range(2):
    # PREALLOCATE
    fx = np.zeros(npoints, npoints-1)
    fy = np.zeros(npoints, npoints-1)
    fz = np.zeros(npoints, npoints-1)

    #changes the particle on which the forces are acting
    for xn in range(0,npoints):
        #changes the forces from other particles acting on the particle

        # REMOVE THE INNER FOR LOOP AND JUST USE THE ARRAYS
        #for step in range(0,npoints):
        #Here we find the accelearation for every particle
        fx[xn] = ((G*mass1[xn]*mass1[1:]*((x1[1:]**2.+y1[1:]**2.+z1[1:]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/  (  abs((x1[1:]**2.+y1[1:]**2.+z1[1:]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2.+(.2)**2  )**(3./2.))

        fy[xn] = ((G*mass1[xn]*mass1[1:]*((x1[1:]**2.+y1[1:]**2.+z1[1:]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/  (    abs((x1[1:]**2.+y1[1:]**2.+z1[1:]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2+(.2)**2 )**(3./2.))

        fz[xn] = ((G*mass1[xn]*mass1[1:]*((x1[1:]**2.+y1[1:]**2.+z1[1:]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/  (    abs((x1[1:]**2.+y1[1:]**2.+z1[1:]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2+(.2)**2 )**(3./2.))

      #Now, I need to split that array up by npoints, each particle has npoints forces acting on it. 
      fxx= np.array_split(fx,npoints)
      fyy= np.array_split(fy,npoints)
      fzz= np.array_split(fz,npoints)

      #since the force on a particle is the sum of all forces acting on it, I'm summing each variable in each array together. e.g. [1,2,3]=[6] 
      fxxx= np.sum(fxx[xn], axis=1)
      fyyy= np.sum(fyy[xn], axis=1)
      fzzz= np.sum(fzz[xn], axis=1)

如何使用多个嵌套 for 循环加速 python 2 程序

How to speed up a python 2 program with multiple nested for-loops

python

numpy

matplotlib

astropy