如何将返回值(从上一个函数)读入pandas、python?获取错误信息
How to read the returned value (from previous function) into pandas, python? Getting error messages
在下面的程序中
我想access/pipe下游函数中一个函数的数据。
使用 python 代码如下:
def main():
data1, data2, data3 = read_file()
do_calc(data1, data2, data3)
def read_file():
data1 = ""
data2 = ""
data3 = ""
file1 = open('file1.txt', 'r+').read()
for line in file1
do something....
data1 += calculated_values
file2 = open('file2.txt', 'r+').read()
for line in file1
do something...
data2 += calculated_values
file1 = open('file1.txt', 'r+').read()
for line in file1
do something...
data3 += calculated_values
return data1, data2, data3
def do_calc(data1, data2, data3):
d1_frame = pd.read_table(data1, sep='\t')
d2_frame = pd.read_table(data2, sep='\t')
d3_frame = pd.read_table(data3, sep='\t')
all_data = [d1_frame, d2_frame, d3_frame]
main()
给定的代码有什么问题?熊猫似乎无法正确读取输入文件,但正在将数据 1、2 和 3 的值打印到屏幕上。
read_hdf 似乎读取了文件但不正确。有没有一种方法可以将函数返回的数据直接读入 pandas(无需将 writing/reading 读入文件)。
错误信息:
Traceback (most recent call last):
File "calc.py", line 757, in <module>
main()
File "calc.py", line 137, in main
merge_tables(pop1_freq_table, pop2_freq_table, f1_freq_table)
File "calc.py", line 373, in merge_tables
df1 = pd.read_table(pop1_freq_table, sep='\t')
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/io/parsers.py", line 645, in parser_f
return _read(filepath_or_buffer, kwds)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/io/parsers.py", line 388, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/io/parsers.py", line 729, in __init__
self._make_engine(self.engine)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/io/parsers.py", line 922, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/io/parsers.py", line 1389, in __init__
self._reader = _parser.TextReader(src, **kwds)
File "pandas/parser.pyx", line 373, in pandas.parser.TextReader.__cinit__ (pandas/parser.c:4019)
File "pandas/parser.pyx", line 665, in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:7967)
FileNotFoundError: File b'0.667,0.333\n2\t15800126\tT\tT,A\t0.667,0.333\n2\t15800193\tC\tC,T\t0.667,0.333\n2\t15800244\tT\tT,C\......
如有任何解释,我将不胜感激。
read_table
需要一个文件作为输入,但您传递的是一串数据而不是包含文件位置的字符串。您可以将数据写入文件,然后从该文件中读取。假设字符串已经正确格式化:
filename = 'tab_separated_file_1.dat'
with open(filename, 'w') as f:
f.write(data1)
df1 = pd.read_table(filename, sep='\t')
正如其他答案所说,read_table 需要输入文件——或者更准确地说,"file-like object"。您可以使用 StringIO 对象将 data1、data2 和 data3 字符串包装在一个对象中,该对象将 "behave" 像一个文件一样馈送到 pandas 并对您的代码进行一些调整:
#Import StringIO...
# python 2
from StringIO import StringIO
# python 3
from io import StringIO
def main():
data1, data2, data3 = read_file()
do_calc(data1, data2, data3)
def read_file():
# use StringIO objects instead of strings...
data1 = StringIO()
data2 = StringIO()
data3 = StringIO()
file1 = open('file1.txt', 'r+').read()
for line in file1
do something....
# note that " += " became ".write()"
data1.write(calculated_values)
file2 = open('file2.txt', 'r+').read()
for line in file1
do something...
data2.write(calculated_values)
file1 = open('file1.txt', 'r+').read()
for line in file1
do something...
data3.write(calculated_values)
return data1, data2, data3
def do_calc(data1, data2, data3):
d1_frame = pd.read_table(data1, sep='\t')
d2_frame = pd.read_table(data2, sep='\t')
d3_frame = pd.read_table(data3, sep='\t')
all_data = [d1_frame, d2_frame, d3_frame]
main()
在下面的程序中
我想access/pipe下游函数中一个函数的数据。
使用 python 代码如下:
def main():
data1, data2, data3 = read_file()
do_calc(data1, data2, data3)
def read_file():
data1 = ""
data2 = ""
data3 = ""
file1 = open('file1.txt', 'r+').read()
for line in file1
do something....
data1 += calculated_values
file2 = open('file2.txt', 'r+').read()
for line in file1
do something...
data2 += calculated_values
file1 = open('file1.txt', 'r+').read()
for line in file1
do something...
data3 += calculated_values
return data1, data2, data3
def do_calc(data1, data2, data3):
d1_frame = pd.read_table(data1, sep='\t')
d2_frame = pd.read_table(data2, sep='\t')
d3_frame = pd.read_table(data3, sep='\t')
all_data = [d1_frame, d2_frame, d3_frame]
main()
给定的代码有什么问题?熊猫似乎无法正确读取输入文件,但正在将数据 1、2 和 3 的值打印到屏幕上。
read_hdf 似乎读取了文件但不正确。有没有一种方法可以将函数返回的数据直接读入 pandas(无需将 writing/reading 读入文件)。
错误信息:
Traceback (most recent call last):
File "calc.py", line 757, in <module>
main()
File "calc.py", line 137, in main
merge_tables(pop1_freq_table, pop2_freq_table, f1_freq_table)
File "calc.py", line 373, in merge_tables
df1 = pd.read_table(pop1_freq_table, sep='\t')
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/io/parsers.py", line 645, in parser_f
return _read(filepath_or_buffer, kwds)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/io/parsers.py", line 388, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/io/parsers.py", line 729, in __init__
self._make_engine(self.engine)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/io/parsers.py", line 922, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/io/parsers.py", line 1389, in __init__
self._reader = _parser.TextReader(src, **kwds)
File "pandas/parser.pyx", line 373, in pandas.parser.TextReader.__cinit__ (pandas/parser.c:4019)
File "pandas/parser.pyx", line 665, in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:7967)
FileNotFoundError: File b'0.667,0.333\n2\t15800126\tT\tT,A\t0.667,0.333\n2\t15800193\tC\tC,T\t0.667,0.333\n2\t15800244\tT\tT,C\......
如有任何解释,我将不胜感激。
read_table
需要一个文件作为输入,但您传递的是一串数据而不是包含文件位置的字符串。您可以将数据写入文件,然后从该文件中读取。假设字符串已经正确格式化:
filename = 'tab_separated_file_1.dat'
with open(filename, 'w') as f:
f.write(data1)
df1 = pd.read_table(filename, sep='\t')
正如其他答案所说,read_table 需要输入文件——或者更准确地说,"file-like object"。您可以使用 StringIO 对象将 data1、data2 和 data3 字符串包装在一个对象中,该对象将 "behave" 像一个文件一样馈送到 pandas 并对您的代码进行一些调整:
#Import StringIO...
# python 2
from StringIO import StringIO
# python 3
from io import StringIO
def main():
data1, data2, data3 = read_file()
do_calc(data1, data2, data3)
def read_file():
# use StringIO objects instead of strings...
data1 = StringIO()
data2 = StringIO()
data3 = StringIO()
file1 = open('file1.txt', 'r+').read()
for line in file1
do something....
# note that " += " became ".write()"
data1.write(calculated_values)
file2 = open('file2.txt', 'r+').read()
for line in file1
do something...
data2.write(calculated_values)
file1 = open('file1.txt', 'r+').read()
for line in file1
do something...
data3.write(calculated_values)
return data1, data2, data3
def do_calc(data1, data2, data3):
d1_frame = pd.read_table(data1, sep='\t')
d2_frame = pd.read_table(data2, sep='\t')
d3_frame = pd.read_table(data3, sep='\t')
all_data = [d1_frame, d2_frame, d3_frame]
main()