如何将 python 中的数据保存到 csv 文件中

Question

我有一个程序，最后会打印 "match" 我想将此 "match" 中的数据保存到 csv 文件中，我该怎么做？我写了一些代码来保存这个变量，但它什么也没写这是我的代码：

import shlex
import subprocess
import os
import platform
from bs4 import BeautifulSoup
import re
import csv
import pickle
def rename_files():
    file_list = os.listdir(r"C:\PROJECT\pdfs")
    print(file_list)
    saved_path = os.getcwd()
    print('Current working directory is '+saved_path)
    os.chdir(r'C:\PROJECT\pdfs')
    for file_name in file_list:
        os.rename(file_name, file_name.translate(None, " "))
    os.chdir(saved_path)
rename_files()

def run(command):
    if platform.system() != 'Windows':
        args = shlex.split(command)
    else:
        args = command
    s = subprocess.Popen(args,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    output, errors = s.communicate()
    return s.returncode == 0, output, errors

# Change this to your PDF file base directory
base_directory = 'C:\PROJECT\pdfs'
if not os.path.isdir(base_directory):
    print "%s is not a directory" % base_directory
    exit(1)
# Change this to your pdf2htmlEX executable location
bin_path = 'C:\Python27\pdfminer-20140328\tools\pdf2txt.py'
if not os.path.isfile(bin_path):
    print "Could not find %s" % bin_path
    exit(1)
for dir_path, dir_name_list, file_name_list in os.walk(base_directory):
    for file_name in file_name_list:
        # If this is not a PDF file
        if not file_name.endswith('.pdf'):
            # Skip it
            continue
        file_path = os.path.join(dir_path, file_name)
        # Convert your PDF to HTML here
        args = (bin_path, file_name, file_path)
        success, output, errors = run("python %s -o %s.html %s " %args)
        if not success:
            print "Could not convert %s to HTML" % file_path
            print "%s" % errors
htmls_path = 'C:\PROJECT'
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
    for file_name in file_name_list:
        if not file_name.endswith('.html'):
            continue
        with open(file_name) as markup:
            soup = BeautifulSoup(markup.read())
            text = soup.get_text()
            match = re.findall("PA/(\S*)\s*(\S*)", text)
            print(match)
with open ('score.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerows('%s' %match)

我试图将其保存到 csv 文件的部分是最后 3 行代码。这是 "match" 格式的打印：https://gyazo.com/930f9dad12109bc50825c91b51fb31f3

Answer 1

假设您已经有了 "match"，您可以使用 Python 中的 CSV module。作者应该完成你的工作。

如果你能详细说明你的数据格式会更有帮助。

Answer 2

按照代码的结构方式，您迭代 for 循环中的匹配项，然后，当循环完成时，将最后一个匹配项保存在 CSV 中。您可能希望在 for 循环内将每个匹配项写入 CSV 中。

尝试将代码的最后几行（从最后一个 for 循环开始）替换为：

with open('score.csv', 'wt') as f:
    writer = csv.writer(f)
    for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
        for file_name in file_name_list:
            if not file_name.endswith('.html'):
                continue
            with open(file_name) as markup:
                soup = BeautifulSoup(markup.read())
                text = soup.get_text()
                match = re.findall("PA/(\S*)\s*(\S*)", text)
                print(match)
                writer.writerow(match)

如何将 python 中的数据保存到 csv 文件中

How to save data from python into a csv file

python

csv

python-2.7

export-to-csv