使用 Python 将一个不同项目长度的元组列表写入 CSV 文件
Write list of tuples of one different item length to CSV file using Python
我有以下结构 [(int, [], []), (int, [], []), (int, [], []), ..., (int, [], [])]
的列表。 []
表示一个句子的标记。
data = [(11.221, ['Maruyama', '(', 'Japan', ')'], ['S-PER', 'O', 'S-LOC', 'O']),
(5.56, ['MANAMA', '1996-08-22'], ['S-LOC', 'O']),
(5.381, ['BEIJING', '1996-08-22'], ['S-LOC', 'O'])]
我想将data
写入CSV文件如下:
11.221, Maruyama (Japan) , Maruyama , S-PER
(, , O
Japan, , S-LOC
), , O
[HERE SHOULD BE SPACE]
5.56 , MANAMA 1996-08-22 , MANAMA , S-LOC
, 1996-08-22, O
[HERE SHOULD BE SPACE]
5.381 , BEIJING 1996-08-22, BEIJING , S-LOC
, 1996-08-22, O
CSV 文件的格式为:
int, sentence (concatenated tokens), token_1, tag_1
, token_2, tag_2
, ...
我尝试了以下方法,但对我来说效果不佳。
import csv
with open('output.csv','w') as f:
for x in [tuple(zip(x[0], x[1], x[2])) for x in data]:
for r in x:
f.write(' '.join(r) + '\n')
f.write('\n')
Traceback: TypeError: 'float' 对象不可迭代
我也打算这样做:
data = [(value, ' '.join(sent), sent, tag) for value, sent, tag in data]
从那时起我尝试了以下方法。
with open('output.csv', 'w') as f:
writer = csv.writer(f , lineterminator='\n')
for value, sent, tokens, tags in data:
writer.writerow(value)
writer.writerow(sent)
for x in [tuple(zip(tokens, tags))]:
for r in x:
writer.writerow(' '.join(r) + '\n')
writer.writerow('\n')
Traceback:错误:预期可迭代,而不是浮动
你可以这样做:
# Create a class to store each text information
class Text:
def __init__(self, code, tokens, tags):
self.code = code
self.tokens = tokens
self.tags = tags
# Concatenate the tokens to create a sentence
self.sentence = ' '.join(tokens)
def write_to_file(data, f):
# Convert all the data to Text objects
texts = [Text(code, tokens, tags) for code, tokens, tags in data]
# Find the maximum column width for each row
widths = {}
widths["code"] = max(len(str(text.code)) for text in texts)
widths["sentence"] = max(len(str(text.sentence)) for text in texts)
widths["token"] = max(len(str(token)) for text in texts for token in text.tokens)
widths["tag"] = max(len(str(tag)) for text in texts for tag in text.tags)
for text in texts:
# Print the code with the code column width
# Note that this print ends with ', ', which have
# length 2. This will be used later.
print(f"{text.code}".ljust(widths["code"], ' '), file=f, end=', ')
# Print the sentence with the sentence column width
# Note that this print also ends with ', ', which have
# length 2. This will also be used later.
print(f"{text.sentence}".ljust(widths["sentence"], ' '), file=f, end=', ')
for i, (token, tag) in enumerate(zip(text.tokens, text.tags)):
# If it's not the first line of the file
if i != 0:
# Print, as spaces, the code column width added to the
# sentence column width, separated by 2 spaces each
print(" " * (widths["code"] + 2 + widths["sentence"] + 2), file=f, end='')
# Print the token with the token column width
print(f"{token}".ljust(widths["token"]), file=f, end=', ')
# Print the tag with the tag column width
print(f"{tag}".ljust(widths["tag"]), file=f, end='')
print(file=f)
print(file=f)
用法:
data = [(11.221, ['Maruyama', '(', 'Japan', ')'], ['S-PER', 'O', 'S-LOC', 'O']),
(5.56, ['MANAMA', '1996-08-22'], ['S-LOC', 'O']),
(5.381, ['BEIJING', '1996-08-22'], ['S-LOC', 'O'])]
with open('file.txt', 'w+') as f:
write_to_file(date, f)
文件内容将是
11.221, Maruyama ( Japan ), Maruyama , S-PER
( , O
Japan , S-LOC
) , O
5.56 , MANAMA 1996-08-22 , MANAMA , S-LOC
1996-08-22, O
5.381 , BEIJING 1996-08-22, BEIJING , S-LOC
1996-08-22, O
我有以下结构 [(int, [], []), (int, [], []), (int, [], []), ..., (int, [], [])]
的列表。 []
表示一个句子的标记。
data = [(11.221, ['Maruyama', '(', 'Japan', ')'], ['S-PER', 'O', 'S-LOC', 'O']),
(5.56, ['MANAMA', '1996-08-22'], ['S-LOC', 'O']),
(5.381, ['BEIJING', '1996-08-22'], ['S-LOC', 'O'])]
我想将data
写入CSV文件如下:
11.221, Maruyama (Japan) , Maruyama , S-PER
(, , O
Japan, , S-LOC
), , O
[HERE SHOULD BE SPACE]
5.56 , MANAMA 1996-08-22 , MANAMA , S-LOC
, 1996-08-22, O
[HERE SHOULD BE SPACE]
5.381 , BEIJING 1996-08-22, BEIJING , S-LOC
, 1996-08-22, O
CSV 文件的格式为:
int, sentence (concatenated tokens), token_1, tag_1
, token_2, tag_2
, ...
我尝试了以下方法,但对我来说效果不佳。
import csv
with open('output.csv','w') as f:
for x in [tuple(zip(x[0], x[1], x[2])) for x in data]:
for r in x:
f.write(' '.join(r) + '\n')
f.write('\n')
Traceback: TypeError: 'float' 对象不可迭代
我也打算这样做:
data = [(value, ' '.join(sent), sent, tag) for value, sent, tag in data]
从那时起我尝试了以下方法。
with open('output.csv', 'w') as f:
writer = csv.writer(f , lineterminator='\n')
for value, sent, tokens, tags in data:
writer.writerow(value)
writer.writerow(sent)
for x in [tuple(zip(tokens, tags))]:
for r in x:
writer.writerow(' '.join(r) + '\n')
writer.writerow('\n')
Traceback:错误:预期可迭代,而不是浮动
你可以这样做:
# Create a class to store each text information
class Text:
def __init__(self, code, tokens, tags):
self.code = code
self.tokens = tokens
self.tags = tags
# Concatenate the tokens to create a sentence
self.sentence = ' '.join(tokens)
def write_to_file(data, f):
# Convert all the data to Text objects
texts = [Text(code, tokens, tags) for code, tokens, tags in data]
# Find the maximum column width for each row
widths = {}
widths["code"] = max(len(str(text.code)) for text in texts)
widths["sentence"] = max(len(str(text.sentence)) for text in texts)
widths["token"] = max(len(str(token)) for text in texts for token in text.tokens)
widths["tag"] = max(len(str(tag)) for text in texts for tag in text.tags)
for text in texts:
# Print the code with the code column width
# Note that this print ends with ', ', which have
# length 2. This will be used later.
print(f"{text.code}".ljust(widths["code"], ' '), file=f, end=', ')
# Print the sentence with the sentence column width
# Note that this print also ends with ', ', which have
# length 2. This will also be used later.
print(f"{text.sentence}".ljust(widths["sentence"], ' '), file=f, end=', ')
for i, (token, tag) in enumerate(zip(text.tokens, text.tags)):
# If it's not the first line of the file
if i != 0:
# Print, as spaces, the code column width added to the
# sentence column width, separated by 2 spaces each
print(" " * (widths["code"] + 2 + widths["sentence"] + 2), file=f, end='')
# Print the token with the token column width
print(f"{token}".ljust(widths["token"]), file=f, end=', ')
# Print the tag with the tag column width
print(f"{tag}".ljust(widths["tag"]), file=f, end='')
print(file=f)
print(file=f)
用法:
data = [(11.221, ['Maruyama', '(', 'Japan', ')'], ['S-PER', 'O', 'S-LOC', 'O']),
(5.56, ['MANAMA', '1996-08-22'], ['S-LOC', 'O']),
(5.381, ['BEIJING', '1996-08-22'], ['S-LOC', 'O'])]
with open('file.txt', 'w+') as f:
write_to_file(date, f)
文件内容将是
11.221, Maruyama ( Japan ), Maruyama , S-PER
( , O
Japan , S-LOC
) , O
5.56 , MANAMA 1996-08-22 , MANAMA , S-LOC
1996-08-22, O
5.381 , BEIJING 1996-08-22, BEIJING , S-LOC
1996-08-22, O