可以将以下代码更改为更快
Can the following code be changed to be faster
我有以下代码,当我 运行 它有一个长时间的停顿,然后在解析后打印了一堆行,然后重复该过程。所以我想知道为什么会有暂停,我如何重新编码以在内存和速度方面更有效率?
#!/usr/bin/env python
import os
from lxml import etree
subdirectories = []
for root, dirs, files in os.walk("/dev/shm/heatmap/OpenHPC"):
for i in dirs:
if "compute" in i:
subdirectories.append(i)
#for i in subdirectories:
# os.system("cd /dev/shm/heatmap/OpenHPC/" + i + " && rrdtool dump cpu_temp.rrd cpu_temp.xml")
for i in subdirectories:
with open(i+"/cpu_temp.xml") as f:
doc = etree.parse(f)
it = iter(doc.xpath(
'//comment()[following-sibling::row] | //row/v/text()'
))
for db_date, db_value in zip(it, it):
with open("temperatureData.txt", 'a+') as f:
f.write(i + " " + db_date.text.strip()[0:23] + " " + db_value + "\n")
print(f.write(i + " " + db_date.text.strip()[0:23] + " " + db_value + "\n")
文件操作是昂贵的,你需要做的越少越好。我建议打开文件一次以附加和最小化写入。这取决于您有多少内存可供您使用,但如果您有足够的内存,我建议构建一个条目数组并一次性将它们批量写入文件(为清楚起见,一些变量已重命名):
#!/usr/bin/env python
import os
from lxml import etree
subdirectories = []
for root, dirs, files in os.walk("/dev/shm/heatmap/OpenHPC"):
for subdir in dirs:
if "compute" in subdir:
subdirectories.append(subdir)
with open('temperatureData.txt', 'a+') as temperatureData:
datalines = []
for subdir in subdirectories:
with open(subdir+"/cpu_temp.xml") as f:
doc = etree.parse(f)
it = iter(doc.xpath(
'//comment()[following-sibling::row] | //row/v/text()'
))
for db_date, db_value in zip(it, it):
dataline = subdir + " " + db_data.text.strip()[0:23] + " " + db_value
datalines.append(dataline)
print(dataline)
temperatureData.write('\n'.join(datalines))
我有以下代码,当我 运行 它有一个长时间的停顿,然后在解析后打印了一堆行,然后重复该过程。所以我想知道为什么会有暂停,我如何重新编码以在内存和速度方面更有效率?
#!/usr/bin/env python
import os
from lxml import etree
subdirectories = []
for root, dirs, files in os.walk("/dev/shm/heatmap/OpenHPC"):
for i in dirs:
if "compute" in i:
subdirectories.append(i)
#for i in subdirectories:
# os.system("cd /dev/shm/heatmap/OpenHPC/" + i + " && rrdtool dump cpu_temp.rrd cpu_temp.xml")
for i in subdirectories:
with open(i+"/cpu_temp.xml") as f:
doc = etree.parse(f)
it = iter(doc.xpath(
'//comment()[following-sibling::row] | //row/v/text()'
))
for db_date, db_value in zip(it, it):
with open("temperatureData.txt", 'a+') as f:
f.write(i + " " + db_date.text.strip()[0:23] + " " + db_value + "\n")
print(f.write(i + " " + db_date.text.strip()[0:23] + " " + db_value + "\n")
文件操作是昂贵的,你需要做的越少越好。我建议打开文件一次以附加和最小化写入。这取决于您有多少内存可供您使用,但如果您有足够的内存,我建议构建一个条目数组并一次性将它们批量写入文件(为清楚起见,一些变量已重命名):
#!/usr/bin/env python
import os
from lxml import etree
subdirectories = []
for root, dirs, files in os.walk("/dev/shm/heatmap/OpenHPC"):
for subdir in dirs:
if "compute" in subdir:
subdirectories.append(subdir)
with open('temperatureData.txt', 'a+') as temperatureData:
datalines = []
for subdir in subdirectories:
with open(subdir+"/cpu_temp.xml") as f:
doc = etree.parse(f)
it = iter(doc.xpath(
'//comment()[following-sibling::row] | //row/v/text()'
))
for db_date, db_value in zip(it, it):
dataline = subdir + " " + db_data.text.strip()[0:23] + " " + db_value
datalines.append(dataline)
print(dataline)
temperatureData.write('\n'.join(datalines))