python gnuplot 完成它的工作太快了
python too fast for gnuplot to complete its work
连同 gnuplot 和 python 我有一个分析,可以简单地描述为 gnuplot 访问文件的统计信息,绘制数据,而 Python 随后将永久删除文件:
import fnmatch
import os
import sys
import time
import PyGnuplot as gp
register = []
for file in os.listdir("."):
if fnmatch.fnmatch(file, "*.dat"):
register.append(file)
register.sort()
def plot_map():
""" Access statistics (column 3) and plot the map with gnuplot. """
for entry in register:
input_file = str(entry)
output_file = str(entry)[:-4] + str(".png")
gp.c('input = "{}"'.format(input_file))
gp.c('set output "{}"'.format(output_file))
gp.c('stats input u 3') # place holder
gp.c('set terminal pngcairo')
gp.c('set title "{}" noenhanced'.format(input_file))
gp.c('unset key')
gp.c('set size square; set pm3 map; set palette cubehelix')
gp.c('sp input u 1:2:3')
# For these data, task and allocated computer, gnuplot requires
# this (empirically estimated) safety margin to complete:
time.sleep(1)
os.remove(entry)
plot_map()
sys.exit(0)
通常,多个具有 50k+ 个条目的矩阵将在一个 运行 中被 sc运行 初始化。如果没有 time.sleep
,通过试错估计,特别是在资源较少的计算机上,然而,Python 可能比 gnuplot 前进得更快,最终没有数据文件留给 gnuplot 处理。
Python 的工作如何更有效地等待 gnuplot 的任务完成,然后才删除有问题的文件?
在这种情况下,我建议使用 subprocess.run
直接 调用 gnuplot
。 run
returns 时,gnuplot 已完成。
对于example:
#!/usr/bin/env python3
# file: histdata.py
# vim:fileencoding=utf-8:fdm=marker:ft=python
#
# Copyright © 2012-2018 R.F. Smith <rsmith@xs4all.nl>.
# SPDX-License-Identifier: MIT
# Created: 2012-07-23T01:18:29+02:00
# Last modified: 2019-07-27T13:50:29+0200
"""Make a histogram and calculate entropy of files."""
import math
import os.path
import subprocess as sp
import sys
def main(argv):
"""
Entry point for histdata.
Arguments:
argv: List of file names.
"""
if len(argv) < 1:
sys.exit(1)
for fn in argv:
hdata, size = readdata(fn)
e = entropy(hdata, size)
print(f"entropy of {fn} is {e:.4f} bits/byte")
histogram_gnuplot(hdata, size, fn)
def readdata(name):
"""
Read the data from a file and count it.
Arguments:
name: String containing the filename to open.
Returns:
A tuple (counts list, length of data).
"""
f = open(name, 'rb')
data = f.read()
f.close()
ba = bytearray(data)
del data
counts = [0] * 256
for b in ba:
counts[b] += 1
return (counts, float(len(ba)))
def entropy(counts, sz):
"""
Calculate the entropy of the data represented by the counts list.
Arguments:
counts: List of counts.
sz: Length of the data in bytes.
Returns:
Entropy value.
"""
ent = 0.0
for b in counts:
if b == 0:
continue
p = float(b) / sz
ent -= p * math.log(p, 256)
return ent * 8
def histogram_gnuplot(counts, sz, name):
"""
Use gnuplot to create a histogram from the data in the form of a PDF file.
Arguments
counts: List of counts.
sz: Length of the data in bytes.
name: Name of the output file.
"""
counts = [100 * c / sz for c in counts]
rnd = 1.0 / 256 * 100
pl = ['set terminal pdfcairo size 18 cm,10 cm']
pl += ["set style line 1 lc rgb '#E41A1C' pt 1 ps 1 lt 1 lw 4"]
pl += ["set style line 2 lc rgb '#377EB8' pt 6 ps 1 lt 1 lw 4"]
pl += ["set style line 3 lc rgb '#4DAF4A' pt 2 ps 1 lt 1 lw 4"]
pl += ["set style line 4 lc rgb '#984EA3' pt 3 ps 1 lt 1 lw 4"]
pl += ["set style line 5 lc rgb '#FF7F00' pt 4 ps 1 lt 1 lw 4"]
pl += ["set style line 6 lc rgb '#FFFF33' pt 5 ps 1 lt 1 lw 4"]
pl += ["set style line 7 lc rgb '#A65628' pt 7 ps 1 lt 1 lw 4"]
pl += ["set style line 8 lc rgb '#F781BF' pt 8 ps 1 lt 1 lw 4"]
pl += ["set palette maxcolors 8"]
pl += [
"set palette defined ( 0 '#E41A1C', 1 '#377EB8', 2 '#4DAF4A',"
" 3 '#984EA3',4 '#FF7F00', 5 '#FFFF33', 6 '#A65628', 7 '#F781BF' )"
]
pl += ["set style line 11 lc rgb '#808080' lt 1 lw 5"]
pl += ["set border 3 back ls 11"]
pl += ["set tics nomirror"]
pl += ["set style line 12 lc rgb '#808080' lt 0 lw 2"]
pl += ["set grid back ls 12"]
nm = os.path.basename(name)
pl += [f"set output 'hist-{nm}.pdf'"]
pl += ['set xrange[-1:256]']
pl += ['set yrange[0:*]']
pl += ['set key right top']
pl += ['set xlabel "byte value"']
pl += ['set ylabel "occurance [%]"']
pl += [f'rnd(x) = {rnd:.6f}']
pl += [f"plot '-' using 1:2 with points ls 1 title '{name}', "
f"rnd(x) with lines ls 2 title 'continuous uniform ({rnd:.6f}%)'"]
for n, v in enumerate(counts):
pl += [f'{n} {v}']
pl += ['e']
pt = '\n'.join(pl)
sp.run(['gnuplot'], input=pt.encode('utf-8'), check=True)
if __name__ == '__main__':
main(sys.argv[1:])
编辑: 如您所见,上面的代码具有重要的历史意义。这些天我倾向于做的一件事是使用此处文档形式的内联数据(请参阅 gnuplot 中的 help inline
)。
这比使用 '-'
文件更灵活。数据是持久的,可以在多个图中使用。
例如:
pl += ['$data << EOD']
pl += [f'{n} {n**2}' for n in range(20)]
pl += ['EOD']
不幸的是,您的最小示例在几个方面都不正确,并且不能立即在我的系统上运行。
以下脚本(在 Windows 下测试(!))的基本思想是在 gnuplot 中调用系统命令。据我所知,似乎需要完整的文件名。
单引号和双引号似乎有些特别。我不确定,但我猜 gnuplot 需要单引号中的文件名,而系统命令(至少在 Windows 中)似乎需要双引号中的文件名。
无论如何,至少,代码在 Windows 下运行没有问题,并且创建绘图和删除文件。如果您使用 Linux,我希望您能相应地调整它。
代码:
### generate plots and delete input files
import os
import PyGnuplot as gp
dir_in = r'C:\Test\In'
dir_out = r'C:\Test\Out'
for file in os.listdir(dir_in):
ffname_in = "'" + os.path.join(dir_in,file) + "'" # 'full filename_in'
ffname_del = '"' + os.path.join(dir_in,file) + '"' # "full filename_del"
ffname_out = "'" + os.path.join(dir_out,file) + ".pdf'" # 'full filename_out'
gp.c('set terminal pdfcairo')
gp.c('set output ' + ffname_out)
gp.c('input = ' + ffname_in)
gp.c('stats input u 2 nooutput')
gp.c('plot input u 1:2')
gp.c('set output')
gp.c('print ' + ffname_in)
delete_cmd = "system ('del " + ffname_del + "')"
gp.c(delete_cmd)
### end of code
连同 gnuplot 和 python 我有一个分析,可以简单地描述为 gnuplot 访问文件的统计信息,绘制数据,而 Python 随后将永久删除文件:
import fnmatch
import os
import sys
import time
import PyGnuplot as gp
register = []
for file in os.listdir("."):
if fnmatch.fnmatch(file, "*.dat"):
register.append(file)
register.sort()
def plot_map():
""" Access statistics (column 3) and plot the map with gnuplot. """
for entry in register:
input_file = str(entry)
output_file = str(entry)[:-4] + str(".png")
gp.c('input = "{}"'.format(input_file))
gp.c('set output "{}"'.format(output_file))
gp.c('stats input u 3') # place holder
gp.c('set terminal pngcairo')
gp.c('set title "{}" noenhanced'.format(input_file))
gp.c('unset key')
gp.c('set size square; set pm3 map; set palette cubehelix')
gp.c('sp input u 1:2:3')
# For these data, task and allocated computer, gnuplot requires
# this (empirically estimated) safety margin to complete:
time.sleep(1)
os.remove(entry)
plot_map()
sys.exit(0)
通常,多个具有 50k+ 个条目的矩阵将在一个 运行 中被 sc运行 初始化。如果没有 time.sleep
,通过试错估计,特别是在资源较少的计算机上,然而,Python 可能比 gnuplot 前进得更快,最终没有数据文件留给 gnuplot 处理。
Python 的工作如何更有效地等待 gnuplot 的任务完成,然后才删除有问题的文件?
在这种情况下,我建议使用 subprocess.run
直接 调用 gnuplot
。 run
returns 时,gnuplot 已完成。
对于example:
#!/usr/bin/env python3
# file: histdata.py
# vim:fileencoding=utf-8:fdm=marker:ft=python
#
# Copyright © 2012-2018 R.F. Smith <rsmith@xs4all.nl>.
# SPDX-License-Identifier: MIT
# Created: 2012-07-23T01:18:29+02:00
# Last modified: 2019-07-27T13:50:29+0200
"""Make a histogram and calculate entropy of files."""
import math
import os.path
import subprocess as sp
import sys
def main(argv):
"""
Entry point for histdata.
Arguments:
argv: List of file names.
"""
if len(argv) < 1:
sys.exit(1)
for fn in argv:
hdata, size = readdata(fn)
e = entropy(hdata, size)
print(f"entropy of {fn} is {e:.4f} bits/byte")
histogram_gnuplot(hdata, size, fn)
def readdata(name):
"""
Read the data from a file and count it.
Arguments:
name: String containing the filename to open.
Returns:
A tuple (counts list, length of data).
"""
f = open(name, 'rb')
data = f.read()
f.close()
ba = bytearray(data)
del data
counts = [0] * 256
for b in ba:
counts[b] += 1
return (counts, float(len(ba)))
def entropy(counts, sz):
"""
Calculate the entropy of the data represented by the counts list.
Arguments:
counts: List of counts.
sz: Length of the data in bytes.
Returns:
Entropy value.
"""
ent = 0.0
for b in counts:
if b == 0:
continue
p = float(b) / sz
ent -= p * math.log(p, 256)
return ent * 8
def histogram_gnuplot(counts, sz, name):
"""
Use gnuplot to create a histogram from the data in the form of a PDF file.
Arguments
counts: List of counts.
sz: Length of the data in bytes.
name: Name of the output file.
"""
counts = [100 * c / sz for c in counts]
rnd = 1.0 / 256 * 100
pl = ['set terminal pdfcairo size 18 cm,10 cm']
pl += ["set style line 1 lc rgb '#E41A1C' pt 1 ps 1 lt 1 lw 4"]
pl += ["set style line 2 lc rgb '#377EB8' pt 6 ps 1 lt 1 lw 4"]
pl += ["set style line 3 lc rgb '#4DAF4A' pt 2 ps 1 lt 1 lw 4"]
pl += ["set style line 4 lc rgb '#984EA3' pt 3 ps 1 lt 1 lw 4"]
pl += ["set style line 5 lc rgb '#FF7F00' pt 4 ps 1 lt 1 lw 4"]
pl += ["set style line 6 lc rgb '#FFFF33' pt 5 ps 1 lt 1 lw 4"]
pl += ["set style line 7 lc rgb '#A65628' pt 7 ps 1 lt 1 lw 4"]
pl += ["set style line 8 lc rgb '#F781BF' pt 8 ps 1 lt 1 lw 4"]
pl += ["set palette maxcolors 8"]
pl += [
"set palette defined ( 0 '#E41A1C', 1 '#377EB8', 2 '#4DAF4A',"
" 3 '#984EA3',4 '#FF7F00', 5 '#FFFF33', 6 '#A65628', 7 '#F781BF' )"
]
pl += ["set style line 11 lc rgb '#808080' lt 1 lw 5"]
pl += ["set border 3 back ls 11"]
pl += ["set tics nomirror"]
pl += ["set style line 12 lc rgb '#808080' lt 0 lw 2"]
pl += ["set grid back ls 12"]
nm = os.path.basename(name)
pl += [f"set output 'hist-{nm}.pdf'"]
pl += ['set xrange[-1:256]']
pl += ['set yrange[0:*]']
pl += ['set key right top']
pl += ['set xlabel "byte value"']
pl += ['set ylabel "occurance [%]"']
pl += [f'rnd(x) = {rnd:.6f}']
pl += [f"plot '-' using 1:2 with points ls 1 title '{name}', "
f"rnd(x) with lines ls 2 title 'continuous uniform ({rnd:.6f}%)'"]
for n, v in enumerate(counts):
pl += [f'{n} {v}']
pl += ['e']
pt = '\n'.join(pl)
sp.run(['gnuplot'], input=pt.encode('utf-8'), check=True)
if __name__ == '__main__':
main(sys.argv[1:])
编辑: 如您所见,上面的代码具有重要的历史意义。这些天我倾向于做的一件事是使用此处文档形式的内联数据(请参阅 gnuplot 中的 help inline
)。
这比使用 '-'
文件更灵活。数据是持久的,可以在多个图中使用。
例如:
pl += ['$data << EOD']
pl += [f'{n} {n**2}' for n in range(20)]
pl += ['EOD']
不幸的是,您的最小示例在几个方面都不正确,并且不能立即在我的系统上运行。
以下脚本(在 Windows 下测试(!))的基本思想是在 gnuplot 中调用系统命令。据我所知,似乎需要完整的文件名。 单引号和双引号似乎有些特别。我不确定,但我猜 gnuplot 需要单引号中的文件名,而系统命令(至少在 Windows 中)似乎需要双引号中的文件名。
无论如何,至少,代码在 Windows 下运行没有问题,并且创建绘图和删除文件。如果您使用 Linux,我希望您能相应地调整它。
代码:
### generate plots and delete input files
import os
import PyGnuplot as gp
dir_in = r'C:\Test\In'
dir_out = r'C:\Test\Out'
for file in os.listdir(dir_in):
ffname_in = "'" + os.path.join(dir_in,file) + "'" # 'full filename_in'
ffname_del = '"' + os.path.join(dir_in,file) + '"' # "full filename_del"
ffname_out = "'" + os.path.join(dir_out,file) + ".pdf'" # 'full filename_out'
gp.c('set terminal pdfcairo')
gp.c('set output ' + ffname_out)
gp.c('input = ' + ffname_in)
gp.c('stats input u 2 nooutput')
gp.c('plot input u 1:2')
gp.c('set output')
gp.c('print ' + ffname_in)
delete_cmd = "system ('del " + ffname_del + "')"
gp.c(delete_cmd)
### end of code