如何 "join" 两个带有 python 的文本文件?
How to "join" two text files with python?
我有两个这样的 txt 文件:
txt1:
Foo
Foo
Foo
Foo
txt2:
Bar
Bar
Bar
Bar
如何将它们连接到一个新文件的左侧和右侧让我们这样说:
Bar Foo
Bar Foo
Bar Foo
Bar Foo
我尝试了以下方法:
folder = ['/Users/user/Desktop/merge1.txt', '/Users/user/Desktop/merge2.txt']
with open('/Users/user/Desktop/merged.txt', 'w') as outfile:
for file in folder:
with open(file) as newfile:
for line in newfile:
outfile.write(line)
您可以使用 zip
压缩这些行然后连接并将它们写入您的输出文件:
folder = ['/Users/user/Desktop/merge1.txt', '/Users/user/Desktop/merge2.txt']
with open('/Users/user/Desktop/merged.txt', 'w') as outfile:
for file in folder:
with open(file[0]) as newfile,open(file[1]) as newfile1:
lines=zip(newfile,newfile1)
for line in lines:
outfile.write(line[0].rstrip() + " " + line[1])
使用 itertools.izip
合并两个文件中的行,像这样
from itertools import izip
with open('res.txt', 'w') as res, open('in1.txt') as f1, open('in2.txt') as f2:
for line1, line2 in izip(f1, f2):
res.write("{} {}\n".format(line1.rstrip(), line2.rstrip()))
注意:此解决方案将仅写入两个文件中的行,直到其中一个文件耗尽。例如,如果第二个文件包含 1000 行而第一个文件只有 2 行,则每个文件中只有两行被复制到结果中。如果即使在最短文件耗尽后你也想要最长文件中的行,你可以使用 itertools.izip_longest
,像这样
from itertools import izip_longest
with open('res.txt', 'w') as res, open('in1.txt') as f1, open('in2.txt') as f2:
for line1, line2 in izip_longest(f1, f2, fillvalue=""):
res.write("{} {}\n".format(line1.rstrip(), line2.rstrip()))
在这种情况下,即使较小的文件用完,较长文件中的行仍将被复制,fillvalue
将用于较短文件中的行。
这里有一个脚本可以解决这个问题:
https://gist.github.com/fabriciorsf/92c5fb1a7d9f001f777813a79e681d8b
#!/usr/bin/env python
'''
Merge/Join/Combine lines of multiple input files.
Write lines consisting of the sequentially corresponding lines from each input file, separated by whitespace character, to output file.
TODO: implements params like https://github.com/coreutils/coreutils/blob/master/src/paste.c
'''
import sys
from contextlib import ExitStack
from itertools import zip_longest
def main(args):
if len(args) < 3:
print(sys.argv[0] + ' <input-file-1> <input-file-2> [<input-file-n>...] <output-file>')
sys.exit(0)
mergeFiles(args[:len(args)-1], args[len(args)-1])
def mergeFiles(inputFileNames, outputFileName, delimiterChar=" ", fillValue="-"):
with ExitStack() as eStack:
inputFiles = [eStack.enter_context(open(fileName, 'r', encoding='utf-8', errors='replace')) for fileName in inputFileNames]
with open(outputFileName, 'w', encoding='utf-8', errors='replace') as outputFile:
for tupleOfLineFiles in zip_longest(*inputFiles, fillvalue=fillValue):
outputFile.write(delimiterChar.join(map(str.strip, tupleOfLineFiles)) + "\n")
if __name__ == "__main__":
main(sys.argv[1:])
我有两个这样的 txt 文件: txt1:
Foo
Foo
Foo
Foo
txt2:
Bar
Bar
Bar
Bar
如何将它们连接到一个新文件的左侧和右侧让我们这样说:
Bar Foo
Bar Foo
Bar Foo
Bar Foo
我尝试了以下方法:
folder = ['/Users/user/Desktop/merge1.txt', '/Users/user/Desktop/merge2.txt']
with open('/Users/user/Desktop/merged.txt', 'w') as outfile:
for file in folder:
with open(file) as newfile:
for line in newfile:
outfile.write(line)
您可以使用 zip
压缩这些行然后连接并将它们写入您的输出文件:
folder = ['/Users/user/Desktop/merge1.txt', '/Users/user/Desktop/merge2.txt']
with open('/Users/user/Desktop/merged.txt', 'w') as outfile:
for file in folder:
with open(file[0]) as newfile,open(file[1]) as newfile1:
lines=zip(newfile,newfile1)
for line in lines:
outfile.write(line[0].rstrip() + " " + line[1])
使用 itertools.izip
合并两个文件中的行,像这样
from itertools import izip
with open('res.txt', 'w') as res, open('in1.txt') as f1, open('in2.txt') as f2:
for line1, line2 in izip(f1, f2):
res.write("{} {}\n".format(line1.rstrip(), line2.rstrip()))
注意:此解决方案将仅写入两个文件中的行,直到其中一个文件耗尽。例如,如果第二个文件包含 1000 行而第一个文件只有 2 行,则每个文件中只有两行被复制到结果中。如果即使在最短文件耗尽后你也想要最长文件中的行,你可以使用 itertools.izip_longest
,像这样
from itertools import izip_longest
with open('res.txt', 'w') as res, open('in1.txt') as f1, open('in2.txt') as f2:
for line1, line2 in izip_longest(f1, f2, fillvalue=""):
res.write("{} {}\n".format(line1.rstrip(), line2.rstrip()))
在这种情况下,即使较小的文件用完,较长文件中的行仍将被复制,fillvalue
将用于较短文件中的行。
这里有一个脚本可以解决这个问题: https://gist.github.com/fabriciorsf/92c5fb1a7d9f001f777813a79e681d8b
#!/usr/bin/env python
'''
Merge/Join/Combine lines of multiple input files.
Write lines consisting of the sequentially corresponding lines from each input file, separated by whitespace character, to output file.
TODO: implements params like https://github.com/coreutils/coreutils/blob/master/src/paste.c
'''
import sys
from contextlib import ExitStack
from itertools import zip_longest
def main(args):
if len(args) < 3:
print(sys.argv[0] + ' <input-file-1> <input-file-2> [<input-file-n>...] <output-file>')
sys.exit(0)
mergeFiles(args[:len(args)-1], args[len(args)-1])
def mergeFiles(inputFileNames, outputFileName, delimiterChar=" ", fillValue="-"):
with ExitStack() as eStack:
inputFiles = [eStack.enter_context(open(fileName, 'r', encoding='utf-8', errors='replace')) for fileName in inputFileNames]
with open(outputFileName, 'w', encoding='utf-8', errors='replace') as outputFile:
for tupleOfLineFiles in zip_longest(*inputFiles, fillvalue=fillValue):
outputFile.write(delimiterChar.join(map(str.strip, tupleOfLineFiles)) + "\n")
if __name__ == "__main__":
main(sys.argv[1:])