通过文件夹中的所有 CSV 文件将此代码编辑为 运行?
Edit this code to run through all CSV files in a folder?
我想先声明一下我是 python 和 pandas 的新手。我通过 CSV 文件将下面的代码创建为 运行,并根据列值解析出行,然后创建并保存到 5 个 CSV 文件中。我现在面临的挑战是我有 50 个文件。我希望找到一种方法,我可以使用我拥有的东西,然后添加一个循环,该循环将 运行 遍历整个文件夹;而不是单独输入每个文件的路径。感谢您的帮助。
import pandas as pd
df=pd.read_csv(r"C:\Users\Kris\Data\Loans 12-21.csv",)
df=df.rename(columns = {'Segmentation/Pool Code':'Code'})
df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]
df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]
df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85"])]
df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125"])]
df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102"])]
#Save Files
df_Auto.to_csv(r"C:\Users\Kris\Data\Loans 12-21_auto.csv")
df_Mortgage.to_csv(r"C:\Users\Kris\Data\Loans 12-21_Mortgafe.csv")
df_HELOC.to_csv(r"C:\Users\Kris\Data\Loans 12-21_HELOC.csv")
df_CC.to_csv(r"C:\Users\Kris\Data\Loans 12-21_CC.csv")
df_Other.to_csv(r"C:\Users\Kris\Data\Loans 12-21.csv")
以此为起点。它将读取 csvs
列表中的每个 CSV,对其进行处理,并将结果写入几个新文件:
import pandas as pd
import os
csv_dir = r"C:\Users\Kris\Data"
csvs = [entry.path for entry in os.scandir(csv_dir) if entry.name.lower().endswith('.csv')]
for csv in csvs:
df=pd.read_csv(csv)
df=df.rename(columns = {'Segmentation/Pool Code':'Code'})
df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]
df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]
df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85"])]
df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125"])]
df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102"])]
#Save Files
file_name, ext = os.path.splitext(csv)
df_Auto.to_csv(f"{file_name}_auto{ext}")
df_Mortgage.to_csv(f"{file_name}_Mortgafe{ext}")
df_HELOC.to_csv(f"{file_name}_HELOC{ext}")
df_CC.to_csv(f"{file_name}_CC{ext}")
df_Other.to_csv(f"{file_name}{ext}")
当文件夹只有 csv 文件时:
import pandas as pd
import os
url = "C://Users//Kris//Data//" # insert your csv folder path
files = os.listdir(url)
for i in files:
df=pd.read_csv(url + i)
df=df.rename(columns = {'Segmentation/Pool Code':'Code'})
df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]
df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]
df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85"])]
df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125"])]
df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102"])]
#Save Files
df_Auto.to_csv(url + i[:-4] + "_auto.csv")
df_Mortgage.to_csv(url + i[:-4] + "_Mortgafe.csv")
df_HELOC.to_csv(url + i[:-4] + "_HELOC.csv")
df_CC.to_csv(url + i[:-4] + "_CC.csv")
df_Other.to_csv(url + i[:-4] +".csv")
我想先声明一下我是 python 和 pandas 的新手。我通过 CSV 文件将下面的代码创建为 运行,并根据列值解析出行,然后创建并保存到 5 个 CSV 文件中。我现在面临的挑战是我有 50 个文件。我希望找到一种方法,我可以使用我拥有的东西,然后添加一个循环,该循环将 运行 遍历整个文件夹;而不是单独输入每个文件的路径。感谢您的帮助。
import pandas as pd
df=pd.read_csv(r"C:\Users\Kris\Data\Loans 12-21.csv",)
df=df.rename(columns = {'Segmentation/Pool Code':'Code'})
df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]
df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]
df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85"])]
df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125"])]
df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102"])]
#Save Files
df_Auto.to_csv(r"C:\Users\Kris\Data\Loans 12-21_auto.csv")
df_Mortgage.to_csv(r"C:\Users\Kris\Data\Loans 12-21_Mortgafe.csv")
df_HELOC.to_csv(r"C:\Users\Kris\Data\Loans 12-21_HELOC.csv")
df_CC.to_csv(r"C:\Users\Kris\Data\Loans 12-21_CC.csv")
df_Other.to_csv(r"C:\Users\Kris\Data\Loans 12-21.csv")
以此为起点。它将读取 csvs
列表中的每个 CSV,对其进行处理,并将结果写入几个新文件:
import pandas as pd
import os
csv_dir = r"C:\Users\Kris\Data"
csvs = [entry.path for entry in os.scandir(csv_dir) if entry.name.lower().endswith('.csv')]
for csv in csvs:
df=pd.read_csv(csv)
df=df.rename(columns = {'Segmentation/Pool Code':'Code'})
df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]
df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]
df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85"])]
df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125"])]
df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102"])]
#Save Files
file_name, ext = os.path.splitext(csv)
df_Auto.to_csv(f"{file_name}_auto{ext}")
df_Mortgage.to_csv(f"{file_name}_Mortgafe{ext}")
df_HELOC.to_csv(f"{file_name}_HELOC{ext}")
df_CC.to_csv(f"{file_name}_CC{ext}")
df_Other.to_csv(f"{file_name}{ext}")
当文件夹只有 csv 文件时:
import pandas as pd
import os
url = "C://Users//Kris//Data//" # insert your csv folder path
files = os.listdir(url)
for i in files:
df=pd.read_csv(url + i)
df=df.rename(columns = {'Segmentation/Pool Code':'Code'})
df_Auto = df.loc[df['Code'].isin(['21', '94', '103', '105', '22', '82', '97', '104', '1', '71', '100', '2', '35', '62', '72', '101'])]
df_Mortgage = df.loc[df['Code'].isin(["M000","M001", "M003", "M004", "M005", "M006", "M007", "M008","M010", "M011", "M013", "M014", "M015", "M016", "M024", "M025", "M027", "M028", "M029", "M031", "M033", "M035","M036","M037","M038","M039",'M040','M041','M042','M043','M044','M020','M021','M022','M023','M026','M032','M034', '18', '28', '34', '87'])]
df_HELOC = df.loc[df['Code'].isin(["17","83","88","19","31","84","85"])]
df_CC = df.loc[df['Code'].isin(["116","118","119","120","121","122","123","125"])]
df_Other = df.loc[df['Code'].isin(["33","41","51","52", "56","57","58","59","75","76","130","131","132","133","134","135","136","140","54", "55","60","77", "78","79","115","4","5","6","7","13","14","16", "32","44","45","46","47","67","106","107","109","110","160","3","10","11","12","25","69","95","102"])]
#Save Files
df_Auto.to_csv(url + i[:-4] + "_auto.csv")
df_Mortgage.to_csv(url + i[:-4] + "_Mortgafe.csv")
df_HELOC.to_csv(url + i[:-4] + "_HELOC.csv")
df_CC.to_csv(url + i[:-4] + "_CC.csv")
df_Other.to_csv(url + i[:-4] +".csv")