如何将 python 中的数据保存到 csv 文件中
How to save data from python into a csv file
我有一个程序,最后会打印 "match" 我想将此 "match" 中的数据保存到 csv 文件中,我该怎么做?我写了一些代码来保存这个变量,但它什么也没写
这是我的代码:
import shlex
import subprocess
import os
import platform
from bs4 import BeautifulSoup
import re
import csv
import pickle
def rename_files():
file_list = os.listdir(r"C:\PROJECT\pdfs")
print(file_list)
saved_path = os.getcwd()
print('Current working directory is '+saved_path)
os.chdir(r'C:\PROJECT\pdfs')
for file_name in file_list:
os.rename(file_name, file_name.translate(None, " "))
os.chdir(saved_path)
rename_files()
def run(command):
if platform.system() != 'Windows':
args = shlex.split(command)
else:
args = command
s = subprocess.Popen(args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, errors = s.communicate()
return s.returncode == 0, output, errors
# Change this to your PDF file base directory
base_directory = 'C:\PROJECT\pdfs'
if not os.path.isdir(base_directory):
print "%s is not a directory" % base_directory
exit(1)
# Change this to your pdf2htmlEX executable location
bin_path = 'C:\Python27\pdfminer-20140328\tools\pdf2txt.py'
if not os.path.isfile(bin_path):
print "Could not find %s" % bin_path
exit(1)
for dir_path, dir_name_list, file_name_list in os.walk(base_directory):
for file_name in file_name_list:
# If this is not a PDF file
if not file_name.endswith('.pdf'):
# Skip it
continue
file_path = os.path.join(dir_path, file_name)
# Convert your PDF to HTML here
args = (bin_path, file_name, file_path)
success, output, errors = run("python %s -o %s.html %s " %args)
if not success:
print "Could not convert %s to HTML" % file_path
print "%s" % errors
htmls_path = 'C:\PROJECT'
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
for file_name in file_name_list:
if not file_name.endswith('.html'):
continue
with open(file_name) as markup:
soup = BeautifulSoup(markup.read())
text = soup.get_text()
match = re.findall("PA/(\S*)\s*(\S*)", text)
print(match)
with open ('score.csv', 'w') as f:
writer = csv.writer(f)
writer.writerows('%s' %match)
我试图将其保存到 csv 文件的部分是最后 3 行代码。
这是 "match" 格式的打印:https://gyazo.com/930f9dad12109bc50825c91b51fb31f3
假设您已经有了 "match",您可以使用 Python 中的 CSV module。作者应该完成你的工作。
如果你能详细说明你的数据格式会更有帮助。
按照代码的结构方式,您迭代 for 循环中的匹配项,然后,当循环完成时,将最后一个匹配项保存在 CSV 中。您可能希望在 for
循环内将每个匹配项写入 CSV 中。
尝试将代码的最后几行(从最后一个 for
循环开始)替换为:
with open('score.csv', 'wt') as f:
writer = csv.writer(f)
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
for file_name in file_name_list:
if not file_name.endswith('.html'):
continue
with open(file_name) as markup:
soup = BeautifulSoup(markup.read())
text = soup.get_text()
match = re.findall("PA/(\S*)\s*(\S*)", text)
print(match)
writer.writerow(match)
我有一个程序,最后会打印 "match" 我想将此 "match" 中的数据保存到 csv 文件中,我该怎么做?我写了一些代码来保存这个变量,但它什么也没写 这是我的代码:
import shlex
import subprocess
import os
import platform
from bs4 import BeautifulSoup
import re
import csv
import pickle
def rename_files():
file_list = os.listdir(r"C:\PROJECT\pdfs")
print(file_list)
saved_path = os.getcwd()
print('Current working directory is '+saved_path)
os.chdir(r'C:\PROJECT\pdfs')
for file_name in file_list:
os.rename(file_name, file_name.translate(None, " "))
os.chdir(saved_path)
rename_files()
def run(command):
if platform.system() != 'Windows':
args = shlex.split(command)
else:
args = command
s = subprocess.Popen(args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, errors = s.communicate()
return s.returncode == 0, output, errors
# Change this to your PDF file base directory
base_directory = 'C:\PROJECT\pdfs'
if not os.path.isdir(base_directory):
print "%s is not a directory" % base_directory
exit(1)
# Change this to your pdf2htmlEX executable location
bin_path = 'C:\Python27\pdfminer-20140328\tools\pdf2txt.py'
if not os.path.isfile(bin_path):
print "Could not find %s" % bin_path
exit(1)
for dir_path, dir_name_list, file_name_list in os.walk(base_directory):
for file_name in file_name_list:
# If this is not a PDF file
if not file_name.endswith('.pdf'):
# Skip it
continue
file_path = os.path.join(dir_path, file_name)
# Convert your PDF to HTML here
args = (bin_path, file_name, file_path)
success, output, errors = run("python %s -o %s.html %s " %args)
if not success:
print "Could not convert %s to HTML" % file_path
print "%s" % errors
htmls_path = 'C:\PROJECT'
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
for file_name in file_name_list:
if not file_name.endswith('.html'):
continue
with open(file_name) as markup:
soup = BeautifulSoup(markup.read())
text = soup.get_text()
match = re.findall("PA/(\S*)\s*(\S*)", text)
print(match)
with open ('score.csv', 'w') as f:
writer = csv.writer(f)
writer.writerows('%s' %match)
我试图将其保存到 csv 文件的部分是最后 3 行代码。 这是 "match" 格式的打印:https://gyazo.com/930f9dad12109bc50825c91b51fb31f3
假设您已经有了 "match",您可以使用 Python 中的 CSV module。作者应该完成你的工作。
如果你能详细说明你的数据格式会更有帮助。
按照代码的结构方式,您迭代 for 循环中的匹配项,然后,当循环完成时,将最后一个匹配项保存在 CSV 中。您可能希望在 for
循环内将每个匹配项写入 CSV 中。
尝试将代码的最后几行(从最后一个 for
循环开始)替换为:
with open('score.csv', 'wt') as f:
writer = csv.writer(f)
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
for file_name in file_name_list:
if not file_name.endswith('.html'):
continue
with open(file_name) as markup:
soup = BeautifulSoup(markup.read())
text = soup.get_text()
match = re.findall("PA/(\S*)\s*(\S*)", text)
print(match)
writer.writerow(match)