将多个文件中的同一列粘贴到一个文件中
paste same column from multiple files into one
我有大约 50 个制表符分隔的文件,我想将其中的 $7 列打印到一个新文件中。所有文件都具有相同数量的列和相同数量的行。在输出中,来自不同文件的列应该彼此相邻粘贴,并用制表符分隔。
我想结合使用 'ls'、'xargs' 和 'awk'。所以 ls 找到我想要的所有文件,然后 awk 打印第 7 列并创建 output.txt
ls /folder/*_name.txt | awk '{print }' xargs {} > output.txt
我的主要问题是 xargs 的使用以及如何在输出文件的不同列中打印所有 $7
如果我理解你正在尝试做的事情是正确的,那么你可以使用 awk
awk -F '\t' 'FNR == 1 { ++file } { col[FNR, file] = } END { for(i = 1; i <= FNR; ++i) { line = col[i, 1]; for(j = 2; j <= file; ++j) { line = line "\t" col[i, j] }; print line } }' file1 file2 file3 file4
密码是
FNR == 1 { ++file } # in the first line of a file, increase
# the file counter, so file is the number
# of the file we're processing
{
col[FNR, file] = # remember the 7th column from all lines
} # by line and file number
END { # at the end:
for(i = 1; i <= FNR; ++i) { # walk through the lines,
line = col[i, 1] # paste together the columns in that line
for(j = 2; j <= file; ++j) { # from each file
line = line "\t" col[i, j]
}
print line # and print the result.
}
}
编辑:动态调整为 assemble 行而不是最后,这可以缩短为
awk -F '\t' 'FNR == 1 && FNR != NR { sep = "\t" } { line[FNR] = line[FNR] sep } END { for(i = 1; i <= FNR; ++i) { print line[i] } }'
也就是
FNR == 1 && FNR != NR { # in the first line, but not in the first file
sep = "\t" # set the separator to a tab (in the first it's empty)
}
{ # assemble the line on the fly
line[FNR] = line[FNR] sep
}
END { # and in the end, print the lines.
for(i = 1; i <= FNR; ++i) {
print line[i]
}
}
束缚自己看,这可以进一步缩短为
awk -F '\t' '{ line[FNR] = line[FNR] sep } ENDFILE { sep = "\t" } END { for(i = 1; i <= FNR; ++i) { print line[i] } }'
...但是 ENDFILE
不为其他 awk 实现(例如 mawk)所知,因此您可能更愿意避免使用它。
我知道这不太好,但您可以使用 Python 非常轻松地完成此操作。我在 5 分钟内写了这篇文章,并在三个具有相同列和行的文件上进行了测试,它有效
import csv, os
def getData(fileDir, newFile, COLUMN):
COLUMN = COLUMN - 1
newFile = os.path.join(fileDir,newFile)
#gets all filepaths for all your files in a directory
filePaths = []
for file in os.listdir(fileDir):
filePaths.append(os.path.join(fileDir,file))
originalData = []
for f in filePaths:
file = []
with open(f, 'rb') as d:
reader = csv.reader(d, delimiter='\t')
#header = (reader.next())[COLUMN] #if you have a header in your csv file uncomment this line so it skips it
for row in reader:
file.append(row[COLUMN])
originalData.append(file)
#gets a count of how many rows are in your file
rows = len(originalData[0])
#creates a new list from the old list and it is now structured like below
#new list = [[File1_Col7_Row1, File2_Col7_Row1, File3_Col7_Row1],[File1_Col7_Row2, File2_Col7_Row2, File3_Col7_Row2]]
newData = []
for i in range(rows):
r = []
for item in originalData:
row = item[i]
r.append(row)
newData.append(r)
#writes the new data to a new file
with open(newFile, 'wb') as f:
writer = csv.writer(f, delimiter='\t')
for row in newData:
writer.writerow(row)
if __name__ == "__main__":
#dir where ONLY the tab files reside
fileDir = "C:\TabFiles"
#new file name, it will be dumped in the dir where the other files reside
newFile = 'newTabFile.txt'
# the column you want to grab
columnNum = 7
getData(fileDir, newFile, columnNum)
我用Python创建了10个文件:
for i in range(1,10):
fn='file'+str(i)+'.tsv'
with open(fn, 'w') as f:
for line in range(1,4):
f.write('\t'.join('{}, line: {}, col: {}'.format(fn, line, col) for col in range(1,10)))
f.write('\n')
创建了 10 个这种类型的文件:
file1.tsv, line: 1, col: 1 file1.tsv, line: 1, col: 2 file1.tsv, line: 1, col: 3 file1.tsv, line: 1, col: 4 file1.tsv, line: 1, col: 5 file1.tsv, line: 1, col: 6 file1.tsv, line: 1, col: 7 file1.tsv, line: 1, col: 8 file1.tsv, line: 1, col: 9
file1.tsv, line: 2, col: 1 file1.tsv, line: 2, col: 2 file1.tsv, line: 2, col: 3 file1.tsv, line: 2, col: 4 file1.tsv, line: 2, col: 5 file1.tsv, line: 2, col: 6 file1.tsv, line: 2, col: 7 file1.tsv, line: 2, col: 8 file1.tsv, line: 2, col: 9
file1.tsv, line: 3, col: 1 file1.tsv, line: 3, col: 2 file1.tsv, line: 3, col: 3 file1.tsv, line: 3, col: 4 file1.tsv, line: 3, col: 5 file1.tsv, line: 3, col: 6 file1.tsv, line: 3, col: 7 file1.tsv, line: 3, col: 8 file1.tsv, line: 3, col: 9
...
file9.tsv, line: 1, col: 1 file9.tsv, line: 1, col: 2 file9.tsv, line: 1, col: 3 file9.tsv, line: 1, col: 4 file9.tsv, line: 1, col: 5 file9.tsv, line: 1, col: 6 file9.tsv, line: 1, col: 7 file9.tsv, line: 1, col: 8 file9.tsv, line: 1, col: 9
file9.tsv, line: 2, col: 1 file9.tsv, line: 2, col: 2 file9.tsv, line: 2, col: 3 file9.tsv, line: 2, col: 4 file9.tsv, line: 2, col: 5 file9.tsv, line: 2, col: 6 file9.tsv, line: 2, col: 7 file9.tsv, line: 2, col: 8 file9.tsv, line: 2, col: 9
file9.tsv, line: 3, col: 1 file9.tsv, line: 3, col: 2 file9.tsv, line: 3, col: 3 file9.tsv, line: 3, col: 4 file9.tsv, line: 3, col: 5 file9.tsv, line: 3, col: 6 file9.tsv, line: 3, col: 7 file9.tsv, line: 3, col: 8 file9.tsv, line: 3, col: 9
现在您已经有了这些示例文件(这就是答案),只需使用 cut
:
$ cut -f 7 *.tsv
file1.tsv, line: 1, col: 7
file1.tsv, line: 2, col: 7
file1.tsv, line: 3, col: 7
file2.tsv, line: 1, col: 7
file2.tsv, line: 2, col: 7
file2.tsv, line: 3, col: 7
file3.tsv, line: 1, col: 7
file3.tsv, line: 2, col: 7
file3.tsv, line: 3, col: 7
file4.tsv, line: 1, col: 7
file4.tsv, line: 2, col: 7
file4.tsv, line: 3, col: 7
file5.tsv, line: 1, col: 7
file5.tsv, line: 2, col: 7
file5.tsv, line: 3, col: 7
file6.tsv, line: 1, col: 7
file6.tsv, line: 2, col: 7
file6.tsv, line: 3, col: 7
file7.tsv, line: 1, col: 7
file7.tsv, line: 2, col: 7
file7.tsv, line: 3, col: 7
file8.tsv, line: 1, col: 7
file8.tsv, line: 2, col: 7
file8.tsv, line: 3, col: 7
file9.tsv, line: 1, col: 7
file9.tsv, line: 2, col: 7
file9.tsv, line: 3, col: 7
然后使用 tr
:
获取这些结果
$ cut -f 7 *.tsv | tr '\n' '\t'
file1.tsv, line: 1, col: 7 file1.tsv, line: 2, col: 7 file1.tsv, line: 3, col: 7 file2.tsv, line: 1, col: 7 file2.tsv, line: 2, col: 7 file2.tsv, line: 3, col: 7 file3.tsv, line: 1, col: 7 file3.tsv, line: 2, col: 7 file3.tsv, line: 3, col: 7 file4.tsv, line: 1, col: 7 file4.tsv, line: 2, col: 7 file4.tsv, line: 3, col: 7 file5.tsv, line: 1, col: 7 file5.tsv, line: 2, col: 7 file5.tsv, line: 3, col: 7 file6.tsv, line: 1, col: 7 file6.tsv, line: 2, col: 7 file6.tsv, line: 3, col: 7 file7.tsv, line: 1, col: 7 file7.tsv, line: 2, col: 7 file7.tsv, line: 3, col: 7 file8.tsv, line: 1, col: 7 file8.tsv, line: 2, col: 7 file8.tsv, line: 3, col: 7 file9.tsv, line: 1, col: 7 file9.tsv, line: 2, col: 7 file9.tsv, line: 3, col: 7
或paste
:
$ cut -f 7 *.tsv | paste -s -d '\t' -
file1.tsv, line: 1, col: 7 file1.tsv, line: 2, col: 7 file1.tsv, line: 3, col: 7 file2.tsv, line: 1, col: 7 file2.tsv, line: 2, col: 7 file2.tsv, line: 3, col: 7 file3.tsv, line: 1, col: 7 file3.tsv, line: 2, col: 7 file3.tsv, line: 3, col: 7 file4.tsv, line: 1, col: 7 file4.tsv, line: 2, col: 7 file4.tsv, line: 3, col: 7 file5.tsv, line: 1, col: 7 file5.tsv, line: 2, col: 7 file5.tsv, line: 3, col: 7 file6.tsv, line: 1, col: 7 file6.tsv, line: 2, col: 7 file6.tsv, line: 3, col: 7 file7.tsv, line: 1, col: 7 file7.tsv, line: 2, col: 7 file7.tsv, line: 3, col: 7 file8.tsv, line: 1, col: 7 file8.tsv, line: 2, col: 7 file8.tsv, line: 3, col: 7 file9.tsv, line: 1, col: 7 file9.tsv, line: 2, col: 7 file9.tsv, line: 3, col: 7
我有大约 50 个制表符分隔的文件,我想将其中的 $7 列打印到一个新文件中。所有文件都具有相同数量的列和相同数量的行。在输出中,来自不同文件的列应该彼此相邻粘贴,并用制表符分隔。
我想结合使用 'ls'、'xargs' 和 'awk'。所以 ls 找到我想要的所有文件,然后 awk 打印第 7 列并创建 output.txt
ls /folder/*_name.txt | awk '{print }' xargs {} > output.txt
我的主要问题是 xargs 的使用以及如何在输出文件的不同列中打印所有 $7
如果我理解你正在尝试做的事情是正确的,那么你可以使用 awk
awk -F '\t' 'FNR == 1 { ++file } { col[FNR, file] = } END { for(i = 1; i <= FNR; ++i) { line = col[i, 1]; for(j = 2; j <= file; ++j) { line = line "\t" col[i, j] }; print line } }' file1 file2 file3 file4
密码是
FNR == 1 { ++file } # in the first line of a file, increase
# the file counter, so file is the number
# of the file we're processing
{
col[FNR, file] = # remember the 7th column from all lines
} # by line and file number
END { # at the end:
for(i = 1; i <= FNR; ++i) { # walk through the lines,
line = col[i, 1] # paste together the columns in that line
for(j = 2; j <= file; ++j) { # from each file
line = line "\t" col[i, j]
}
print line # and print the result.
}
}
编辑:动态调整为 assemble 行而不是最后,这可以缩短为
awk -F '\t' 'FNR == 1 && FNR != NR { sep = "\t" } { line[FNR] = line[FNR] sep } END { for(i = 1; i <= FNR; ++i) { print line[i] } }'
也就是
FNR == 1 && FNR != NR { # in the first line, but not in the first file
sep = "\t" # set the separator to a tab (in the first it's empty)
}
{ # assemble the line on the fly
line[FNR] = line[FNR] sep
}
END { # and in the end, print the lines.
for(i = 1; i <= FNR; ++i) {
print line[i]
}
}
束缚自己看,这可以进一步缩短为
awk -F '\t' '{ line[FNR] = line[FNR] sep } ENDFILE { sep = "\t" } END { for(i = 1; i <= FNR; ++i) { print line[i] } }'
...但是 ENDFILE
不为其他 awk 实现(例如 mawk)所知,因此您可能更愿意避免使用它。
我知道这不太好,但您可以使用 Python 非常轻松地完成此操作。我在 5 分钟内写了这篇文章,并在三个具有相同列和行的文件上进行了测试,它有效
import csv, os
def getData(fileDir, newFile, COLUMN):
COLUMN = COLUMN - 1
newFile = os.path.join(fileDir,newFile)
#gets all filepaths for all your files in a directory
filePaths = []
for file in os.listdir(fileDir):
filePaths.append(os.path.join(fileDir,file))
originalData = []
for f in filePaths:
file = []
with open(f, 'rb') as d:
reader = csv.reader(d, delimiter='\t')
#header = (reader.next())[COLUMN] #if you have a header in your csv file uncomment this line so it skips it
for row in reader:
file.append(row[COLUMN])
originalData.append(file)
#gets a count of how many rows are in your file
rows = len(originalData[0])
#creates a new list from the old list and it is now structured like below
#new list = [[File1_Col7_Row1, File2_Col7_Row1, File3_Col7_Row1],[File1_Col7_Row2, File2_Col7_Row2, File3_Col7_Row2]]
newData = []
for i in range(rows):
r = []
for item in originalData:
row = item[i]
r.append(row)
newData.append(r)
#writes the new data to a new file
with open(newFile, 'wb') as f:
writer = csv.writer(f, delimiter='\t')
for row in newData:
writer.writerow(row)
if __name__ == "__main__":
#dir where ONLY the tab files reside
fileDir = "C:\TabFiles"
#new file name, it will be dumped in the dir where the other files reside
newFile = 'newTabFile.txt'
# the column you want to grab
columnNum = 7
getData(fileDir, newFile, columnNum)
我用Python创建了10个文件:
for i in range(1,10):
fn='file'+str(i)+'.tsv'
with open(fn, 'w') as f:
for line in range(1,4):
f.write('\t'.join('{}, line: {}, col: {}'.format(fn, line, col) for col in range(1,10)))
f.write('\n')
创建了 10 个这种类型的文件:
file1.tsv, line: 1, col: 1 file1.tsv, line: 1, col: 2 file1.tsv, line: 1, col: 3 file1.tsv, line: 1, col: 4 file1.tsv, line: 1, col: 5 file1.tsv, line: 1, col: 6 file1.tsv, line: 1, col: 7 file1.tsv, line: 1, col: 8 file1.tsv, line: 1, col: 9
file1.tsv, line: 2, col: 1 file1.tsv, line: 2, col: 2 file1.tsv, line: 2, col: 3 file1.tsv, line: 2, col: 4 file1.tsv, line: 2, col: 5 file1.tsv, line: 2, col: 6 file1.tsv, line: 2, col: 7 file1.tsv, line: 2, col: 8 file1.tsv, line: 2, col: 9
file1.tsv, line: 3, col: 1 file1.tsv, line: 3, col: 2 file1.tsv, line: 3, col: 3 file1.tsv, line: 3, col: 4 file1.tsv, line: 3, col: 5 file1.tsv, line: 3, col: 6 file1.tsv, line: 3, col: 7 file1.tsv, line: 3, col: 8 file1.tsv, line: 3, col: 9
...
file9.tsv, line: 1, col: 1 file9.tsv, line: 1, col: 2 file9.tsv, line: 1, col: 3 file9.tsv, line: 1, col: 4 file9.tsv, line: 1, col: 5 file9.tsv, line: 1, col: 6 file9.tsv, line: 1, col: 7 file9.tsv, line: 1, col: 8 file9.tsv, line: 1, col: 9
file9.tsv, line: 2, col: 1 file9.tsv, line: 2, col: 2 file9.tsv, line: 2, col: 3 file9.tsv, line: 2, col: 4 file9.tsv, line: 2, col: 5 file9.tsv, line: 2, col: 6 file9.tsv, line: 2, col: 7 file9.tsv, line: 2, col: 8 file9.tsv, line: 2, col: 9
file9.tsv, line: 3, col: 1 file9.tsv, line: 3, col: 2 file9.tsv, line: 3, col: 3 file9.tsv, line: 3, col: 4 file9.tsv, line: 3, col: 5 file9.tsv, line: 3, col: 6 file9.tsv, line: 3, col: 7 file9.tsv, line: 3, col: 8 file9.tsv, line: 3, col: 9
现在您已经有了这些示例文件(这就是答案),只需使用 cut
:
$ cut -f 7 *.tsv
file1.tsv, line: 1, col: 7
file1.tsv, line: 2, col: 7
file1.tsv, line: 3, col: 7
file2.tsv, line: 1, col: 7
file2.tsv, line: 2, col: 7
file2.tsv, line: 3, col: 7
file3.tsv, line: 1, col: 7
file3.tsv, line: 2, col: 7
file3.tsv, line: 3, col: 7
file4.tsv, line: 1, col: 7
file4.tsv, line: 2, col: 7
file4.tsv, line: 3, col: 7
file5.tsv, line: 1, col: 7
file5.tsv, line: 2, col: 7
file5.tsv, line: 3, col: 7
file6.tsv, line: 1, col: 7
file6.tsv, line: 2, col: 7
file6.tsv, line: 3, col: 7
file7.tsv, line: 1, col: 7
file7.tsv, line: 2, col: 7
file7.tsv, line: 3, col: 7
file8.tsv, line: 1, col: 7
file8.tsv, line: 2, col: 7
file8.tsv, line: 3, col: 7
file9.tsv, line: 1, col: 7
file9.tsv, line: 2, col: 7
file9.tsv, line: 3, col: 7
然后使用 tr
:
$ cut -f 7 *.tsv | tr '\n' '\t'
file1.tsv, line: 1, col: 7 file1.tsv, line: 2, col: 7 file1.tsv, line: 3, col: 7 file2.tsv, line: 1, col: 7 file2.tsv, line: 2, col: 7 file2.tsv, line: 3, col: 7 file3.tsv, line: 1, col: 7 file3.tsv, line: 2, col: 7 file3.tsv, line: 3, col: 7 file4.tsv, line: 1, col: 7 file4.tsv, line: 2, col: 7 file4.tsv, line: 3, col: 7 file5.tsv, line: 1, col: 7 file5.tsv, line: 2, col: 7 file5.tsv, line: 3, col: 7 file6.tsv, line: 1, col: 7 file6.tsv, line: 2, col: 7 file6.tsv, line: 3, col: 7 file7.tsv, line: 1, col: 7 file7.tsv, line: 2, col: 7 file7.tsv, line: 3, col: 7 file8.tsv, line: 1, col: 7 file8.tsv, line: 2, col: 7 file8.tsv, line: 3, col: 7 file9.tsv, line: 1, col: 7 file9.tsv, line: 2, col: 7 file9.tsv, line: 3, col: 7
或paste
:
$ cut -f 7 *.tsv | paste -s -d '\t' -
file1.tsv, line: 1, col: 7 file1.tsv, line: 2, col: 7 file1.tsv, line: 3, col: 7 file2.tsv, line: 1, col: 7 file2.tsv, line: 2, col: 7 file2.tsv, line: 3, col: 7 file3.tsv, line: 1, col: 7 file3.tsv, line: 2, col: 7 file3.tsv, line: 3, col: 7 file4.tsv, line: 1, col: 7 file4.tsv, line: 2, col: 7 file4.tsv, line: 3, col: 7 file5.tsv, line: 1, col: 7 file5.tsv, line: 2, col: 7 file5.tsv, line: 3, col: 7 file6.tsv, line: 1, col: 7 file6.tsv, line: 2, col: 7 file6.tsv, line: 3, col: 7 file7.tsv, line: 1, col: 7 file7.tsv, line: 2, col: 7 file7.tsv, line: 3, col: 7 file8.tsv, line: 1, col: 7 file8.tsv, line: 2, col: 7 file8.tsv, line: 3, col: 7 file9.tsv, line: 1, col: 7 file9.tsv, line: 2, col: 7 file9.tsv, line: 3, col: 7