加快将 numpy 数据以二进制形式写入文件的速度。帘布层发生器
Speed up write numpy data in binary to file. ply generator
我制作了自己的 ply exporter,尽管它可以工作,但速度很慢。我假设问题来自具体循环元素,并且基于 this comment,来自使用 struct.pack
.
导出 Nodes
的挑战在于它是元组的一维向量。导出 Faces
的挑战在于 int
的每一行前面都有一个 uchar
格式的数字“3”。
我将整个代码放在这里,因为它可以用作 MWE,如果有人想使用代码以 ply 格式导出网格,也可以作为参考。
代码:
from __future__ import division
import numpy as np
import struct
import timeit
def Timeme(funct,var,NN=10,NNN=10):
for i in xrange(NN):
start =timeit.default_timer()
for t in xrange(NNN):
funct(*var)
end =timeit.default_timer()
print str(i)+': '+str((end - start)/NNN*1000)
# This function is fictitious. In reality the
# Nodes array is imported from another module
def MakeNodes(Nr,Nc):
Nodes=np.zeros(Nr*Nc,dtype=[('x', np.float32), ('y', np.float32), ('z', np.float32)])
x = np.linspace(0, (Nc-1), Nc, dtype=np.float32)
y = np.linspace((Nr-1),0 , Nr, dtype=np.float32)
xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy')
Nodes['x']=xv.flatten()
Nodes['y']=yv.flatten()
Nodes['z']=(1/2-((Nodes['x']/Nc-1/2)**2+ (Nodes['y']/Nr-1/2)**2))*Nr/2
return Nodes
# Function below explained in
def MakeFaces(Nr,Nc):
out = np.empty((Nr-1,Nc-1,2,3),dtype=int)
r = np.arange(Nr*Nc).reshape(Nr,Nc)
l1=r[:-1,:-1]
l2=r[:-1,1:]
l3=r[1:,:-1]
l4=r[1:,1:]
out[:,:, 0,0] = l2
out[:,:, 0,1] = l1
out[:,:, 0,2] = l3
out[:,:, 1,0] = l4
out[:,:, 1,1] = l2
out[:,:, 1,2] = l3
out.shape =(-1,3)
return out
def ExportPlyBinary(Nodes,Faces,file):
LN=len(Nodes)
LF=len(Faces)
header= \
"ply\n" \
"format binary_little_endian 1.0\n" \
"element vertex "+str(LN)+"\n" \
"property float x\n" \
"property float y\n" \
"property float z\n" \
"element face "+str(LF)+"\n" \
"property list uchar int vertex_indices\n" \
"end_header\n"
with open(file, 'wb') as fp:
fp.write(header)
s = struct.Struct('<fff')
for nd in Nodes:
fp.write(s.pack(nd['x'],nd['y'],nd['z']))
s = struct.Struct('<Blll')
for fc in Faces:
fp.write(s.pack(3,fc[0],fc[1],fc[2]))
Nr=200
Nc=200
Nodes=MakeNodes(Nr,Nc)
Faces=MakeFaces(Nr,Nc)
Timeme(ExportPlyBinary,(Nodes,Faces,"Test.ply"))
Timeme(np.savez,("Test_np.ply", Nodes,Faces))
结果:
0: 366.352801235
1: 386.216017627
2: 383.307741944
3: 359.598214393
4: 363.434228045
5: 397.255473919
6: 433.967095136
7: 407.806616677
8: 393.701390596
9: 379.542319143
0: 15.5258007875
1: 13.2543344563
2: 12.8754439597
3: 24.2303215372
4: 15.9684973291
5: 14.2023306048
6: 13.7465456437
7: 13.6964054484
8: 21.27484093
9: 13.2139143373
为什么不用可用的东西?
例如三角网格?
https://pypi.python.org/pypi/trimesh
即使您只想要包的一小部分,您也可以从他们的源代码中复制和调整这部分。 (当然要提到作者)
我按照@max9111 的建议检查了 Trimesh
并且能够创建更高效的函数。关键思想(以我的理解)是(1)通过创建新数组转换为正确的数据类型和顺序,以及(2)使用 .tostring
函数。我最初避免了这个方向,因为它似乎浪费内存,但在这一点上,优势很明显。请注意,我的 Nodes
数组已准备好对其应用 .tostring
,但我会保留更通用的解决方案。
def NewExportPlyBinary(Nodes,Faces,file):
LN=len(Nodes)
LF=len(Faces)
header= \
"ply\n" \
"format binary_little_endian 1.0\n" \
"element vertex "+str(LN)+"\n" \
"property float x\n" \
"property float y\n" \
"property float z\n" \
"element face "+str(LF)+"\n" \
"property list uchar int vertex_indices\n" \
"end_header\n"
dtype_vertex = [('vertex', '<f4', (3))]
vertex = np.empty(LN, dtype=dtype_vertex)
vertex['vertex']=np.stack((Nodes['x'],Nodes['y'],Nodes['z']),axis=-1)
# vertex=Nodes
dtype_face = [('count', '<u1'),('index', '<i4', (3))]
faces = np.empty(LF, dtype=dtype_face)
faces['count'] = 3
faces['index'] = Faces
with open(file, 'wb') as fp:
fp.write(header)
fp.write(vertex.tostring())
fp.write(faces.tostring())
对于 200x200 示例,我现在得到以下时间:
0: 373.361611377 # original ExportPlyBinary
0: 20.5686725792 # numpy's savez
0: 4.85469689001 # NewExportPlyBinary
注意:savez
和NewExportPlyBinary
之间的差异在问题规模增大时基本消失。
我制作了自己的 ply exporter,尽管它可以工作,但速度很慢。我假设问题来自具体循环元素,并且基于 this comment,来自使用 struct.pack
.
导出 Nodes
的挑战在于它是元组的一维向量。导出 Faces
的挑战在于 int
的每一行前面都有一个 uchar
格式的数字“3”。
我将整个代码放在这里,因为它可以用作 MWE,如果有人想使用代码以 ply 格式导出网格,也可以作为参考。
代码:
from __future__ import division
import numpy as np
import struct
import timeit
def Timeme(funct,var,NN=10,NNN=10):
for i in xrange(NN):
start =timeit.default_timer()
for t in xrange(NNN):
funct(*var)
end =timeit.default_timer()
print str(i)+': '+str((end - start)/NNN*1000)
# This function is fictitious. In reality the
# Nodes array is imported from another module
def MakeNodes(Nr,Nc):
Nodes=np.zeros(Nr*Nc,dtype=[('x', np.float32), ('y', np.float32), ('z', np.float32)])
x = np.linspace(0, (Nc-1), Nc, dtype=np.float32)
y = np.linspace((Nr-1),0 , Nr, dtype=np.float32)
xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy')
Nodes['x']=xv.flatten()
Nodes['y']=yv.flatten()
Nodes['z']=(1/2-((Nodes['x']/Nc-1/2)**2+ (Nodes['y']/Nr-1/2)**2))*Nr/2
return Nodes
# Function below explained in
def MakeFaces(Nr,Nc):
out = np.empty((Nr-1,Nc-1,2,3),dtype=int)
r = np.arange(Nr*Nc).reshape(Nr,Nc)
l1=r[:-1,:-1]
l2=r[:-1,1:]
l3=r[1:,:-1]
l4=r[1:,1:]
out[:,:, 0,0] = l2
out[:,:, 0,1] = l1
out[:,:, 0,2] = l3
out[:,:, 1,0] = l4
out[:,:, 1,1] = l2
out[:,:, 1,2] = l3
out.shape =(-1,3)
return out
def ExportPlyBinary(Nodes,Faces,file):
LN=len(Nodes)
LF=len(Faces)
header= \
"ply\n" \
"format binary_little_endian 1.0\n" \
"element vertex "+str(LN)+"\n" \
"property float x\n" \
"property float y\n" \
"property float z\n" \
"element face "+str(LF)+"\n" \
"property list uchar int vertex_indices\n" \
"end_header\n"
with open(file, 'wb') as fp:
fp.write(header)
s = struct.Struct('<fff')
for nd in Nodes:
fp.write(s.pack(nd['x'],nd['y'],nd['z']))
s = struct.Struct('<Blll')
for fc in Faces:
fp.write(s.pack(3,fc[0],fc[1],fc[2]))
Nr=200
Nc=200
Nodes=MakeNodes(Nr,Nc)
Faces=MakeFaces(Nr,Nc)
Timeme(ExportPlyBinary,(Nodes,Faces,"Test.ply"))
Timeme(np.savez,("Test_np.ply", Nodes,Faces))
结果:
0: 366.352801235
1: 386.216017627
2: 383.307741944
3: 359.598214393
4: 363.434228045
5: 397.255473919
6: 433.967095136
7: 407.806616677
8: 393.701390596
9: 379.542319143
0: 15.5258007875
1: 13.2543344563
2: 12.8754439597
3: 24.2303215372
4: 15.9684973291
5: 14.2023306048
6: 13.7465456437
7: 13.6964054484
8: 21.27484093
9: 13.2139143373
为什么不用可用的东西?
例如三角网格? https://pypi.python.org/pypi/trimesh
即使您只想要包的一小部分,您也可以从他们的源代码中复制和调整这部分。 (当然要提到作者)
我按照@max9111 的建议检查了 Trimesh
并且能够创建更高效的函数。关键思想(以我的理解)是(1)通过创建新数组转换为正确的数据类型和顺序,以及(2)使用 .tostring
函数。我最初避免了这个方向,因为它似乎浪费内存,但在这一点上,优势很明显。请注意,我的 Nodes
数组已准备好对其应用 .tostring
,但我会保留更通用的解决方案。
def NewExportPlyBinary(Nodes,Faces,file):
LN=len(Nodes)
LF=len(Faces)
header= \
"ply\n" \
"format binary_little_endian 1.0\n" \
"element vertex "+str(LN)+"\n" \
"property float x\n" \
"property float y\n" \
"property float z\n" \
"element face "+str(LF)+"\n" \
"property list uchar int vertex_indices\n" \
"end_header\n"
dtype_vertex = [('vertex', '<f4', (3))]
vertex = np.empty(LN, dtype=dtype_vertex)
vertex['vertex']=np.stack((Nodes['x'],Nodes['y'],Nodes['z']),axis=-1)
# vertex=Nodes
dtype_face = [('count', '<u1'),('index', '<i4', (3))]
faces = np.empty(LF, dtype=dtype_face)
faces['count'] = 3
faces['index'] = Faces
with open(file, 'wb') as fp:
fp.write(header)
fp.write(vertex.tostring())
fp.write(faces.tostring())
对于 200x200 示例,我现在得到以下时间:
0: 373.361611377 # original ExportPlyBinary
0: 20.5686725792 # numpy's savez
0: 4.85469689001 # NewExportPlyBinary
注意:savez
和NewExportPlyBinary
之间的差异在问题规模增大时基本消失。