将多个 .txt 文件作为数据框加载到 python
loading a multiple .txt files in to python as dataframe
我正在努力将多个 .txt 文件加载到我桌面上的 python。我是 Python 的新手。我的目标是加载多个 .txt 文件,这些文件保存在同一目录中。 .txt 文件是纯文本。在此先感谢您的帮助!
你可以这样做。
from collections import defaultdict
from pathlib import Path
import pandas as df
my_dir_path = "/parh/to/folder"
results = defaultdict(list)
for file in Path(my_dir_path).iterdir():
with open(file, "r") as file_open:
results["file_name"].append(file.name)
results["text"].append(file_open.read())
df = pd.DataFrame(results)
这可能太长了,但如果需要,可以为文件名创建另一列:
import os
import csv
import pandas as pd
main_folder = 'path\to\some_folder'
def get_filename(path):
filenames = []
files = [i.path for i in os.scandir(path) if i.is_file()]
for filename in files:
filename = os.path.basename(filename)
filenames.append(filename)
return filenames
files = get_filename(main_folder)
with open('some.csv', 'w', encoding = 'utf8', newline = '') as csv_file:
for _file in files:
file_name = _file
with open(main_folder +'\'+ _file,'r') as f:
text = f.read()
writer = csv.writer(csv_file)
writer.writerow([file_name, text])
df = pd.read_csv('some.csv')
# ...then whatever...
我会这样做。
import glob
read_files = glob.glob('C:\your_path_here\*.txt')
with open('result.txt', 'wb') as outfile:
for f in read_files:
with open(f, 'rb') as infile:
outfile.write(infile.read())
我有 5 个如下所示的文本文件:
FName,LName,Address
Jim,Bentz,34 Holloway La.
George,Hororitz,76 Ridge Dr.
Eric,Schimtz,11 Main St.
最终结果如下所示:
FName,LName,Address
Jim,Bentz,34 Holloway La.
George,Hororitz,76 Ridge Dr.
Eric,Schimtz,11 Main St.
FName,LName,Address
Jim,Bentz,34 Holloway La.
George,Hororitz,76 Ridge Dr.
Eric,Schimtz,11 Main St.
FName,LName,Address
Jim,Bentz,34 Holloway La.
George,Hororitz,76 Ridge Dr.
Eric,Schimtz,11 Main St.
FName,LName,Address
Jim,Bentz,34 Holloway La.
George,Hororitz,76 Ridge Dr.
Eric,Schimtz,11 Main St.
FName,LName,Address
Jim,Bentz,34 Holloway La.
George,Hororitz,76 Ridge Dr.
Eric,Schimtz,11 Main St.
我正在努力将多个 .txt 文件加载到我桌面上的 python。我是 Python 的新手。我的目标是加载多个 .txt 文件,这些文件保存在同一目录中。 .txt 文件是纯文本。在此先感谢您的帮助!
你可以这样做。
from collections import defaultdict
from pathlib import Path
import pandas as df
my_dir_path = "/parh/to/folder"
results = defaultdict(list)
for file in Path(my_dir_path).iterdir():
with open(file, "r") as file_open:
results["file_name"].append(file.name)
results["text"].append(file_open.read())
df = pd.DataFrame(results)
这可能太长了,但如果需要,可以为文件名创建另一列:
import os
import csv
import pandas as pd
main_folder = 'path\to\some_folder'
def get_filename(path):
filenames = []
files = [i.path for i in os.scandir(path) if i.is_file()]
for filename in files:
filename = os.path.basename(filename)
filenames.append(filename)
return filenames
files = get_filename(main_folder)
with open('some.csv', 'w', encoding = 'utf8', newline = '') as csv_file:
for _file in files:
file_name = _file
with open(main_folder +'\'+ _file,'r') as f:
text = f.read()
writer = csv.writer(csv_file)
writer.writerow([file_name, text])
df = pd.read_csv('some.csv')
# ...then whatever...
我会这样做。
import glob
read_files = glob.glob('C:\your_path_here\*.txt')
with open('result.txt', 'wb') as outfile:
for f in read_files:
with open(f, 'rb') as infile:
outfile.write(infile.read())
我有 5 个如下所示的文本文件:
FName,LName,Address
Jim,Bentz,34 Holloway La.
George,Hororitz,76 Ridge Dr.
Eric,Schimtz,11 Main St.
最终结果如下所示:
FName,LName,Address
Jim,Bentz,34 Holloway La.
George,Hororitz,76 Ridge Dr.
Eric,Schimtz,11 Main St.
FName,LName,Address
Jim,Bentz,34 Holloway La.
George,Hororitz,76 Ridge Dr.
Eric,Schimtz,11 Main St.
FName,LName,Address
Jim,Bentz,34 Holloway La.
George,Hororitz,76 Ridge Dr.
Eric,Schimtz,11 Main St.
FName,LName,Address
Jim,Bentz,34 Holloway La.
George,Hororitz,76 Ridge Dr.
Eric,Schimtz,11 Main St.
FName,LName,Address
Jim,Bentz,34 Holloway La.
George,Hororitz,76 Ridge Dr.
Eric,Schimtz,11 Main St.