如何根据 java 中的唯一列合并 csv 文件
How to merge csv files based on unique columns in java
我有 3 个不同的非常大的文件,我的要求是将它们合并到一个 csv 文件中。所有这 3 个文件都有不同的列数,但它们之间包含一些独特的列。
例子
文件 1:
StudentId StudentName ClassId
1 Ajay 6
2 Vinay 8
3 Geeta 6
4 Sameer 7
文件 2:
ClassId ClassColor
6 Blue
7 Grey
8 White
文件 3:
HouseId HouseName StudentId
1 Knights 4
2 Spartans 1
3 Samurai 2
4 Trojans 3
如您所见,file1 和 file2 具有共同的 classId 并且 file1 和 file2 具有共同的 studentId。
现在,基于此信息,我希望通过基于公用密钥加入文件来获得这样的结果。
StudentId StudentName ClassId ClassColor HouseId HouseName
1 Ajay 6 Blue 2 Spartans
2 Vinay 8 White 3 Samurai
3 Geeta 6 Blue 4 Trojans
4 Sameer 7 Grey 1 Knights
这种情况对我来说是全新的,我做了一些研究但没有在 java 中找到解决方案。
在这个阶段,我可以通过以下实现加入两个文件:
Class 保存 csv 文件数据:
import java.util.LinkedHashMap;
import java.util.Map;
public class CsvVo {
private Map<String, String> keyVal;
public CsvVo(String id) {
keyVal = new LinkedHashMap<>();// you may also use HashMap if you don't need to keep order
}
public Map<String, String> getKeyVal() {
return keyVal;
}
public void setKeyVal(Map<String, String> keyVal) {
this.keyVal = keyVal;
}
public void put(String key, String val) {
keyVal.put(key, val);
}
public String get(String key) {
return keyVal.get(key);
}
}
读取文件的 CSV 解析器:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
public class CsvParser {
public static List<CsvVo> getRecodrsFromACsv(File file, List<String> keys) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(file));
List<CsvVo> records = new ArrayList<>();
boolean isHeader = true;
String line = null;
while ((line = br.readLine()) != null) {
if (isHeader) {// first line is header
isHeader = false;
continue;
}
CsvVo record = new CsvVo(file.getName());
String[] lineSplit = line.split(",");
for (int i = 0; i < lineSplit.length; i++) {
record.put(keys.get(i), lineSplit[i]);
}
records.add(record);
}
br.close();
return records;
}
public static List<String> getHeadersFromACsv(File file) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(file));
List<String> headers = null;
String line = null;
while ((line = br.readLine()) != null) {
String[] lineSplit = line.split(",");
headers = new ArrayList<>(Arrays.asList(lineSplit));
break;
}
br.close();
return headers;
}
public static void writeToCsv(final File file, final Set<String> headers, final List<CsvVo> records)
throws IOException {
FileWriter csvWriter = new FileWriter(file);
// write headers
String sep = "";
String[] headersArr = headers.toArray(new String[headers.size()]);
for (String header : headersArr) {
csvWriter.append(sep);
csvWriter.append(header);
sep = ",";
}
csvWriter.append("\n");
// write records at each line
for (CsvVo record : records) {
sep = "";
for (int i = 0; i < headersArr.length; i++) {
csvWriter.append(sep);
csvWriter.append(record.get(headersArr[i]));
sep = ",";
}
csvWriter.append("\n");
}
csvWriter.flush();
csvWriter.close();
}
}
主要Class 测试功能:
import java.io.File;
import java.io.IOException;
import java.util.*;
public class CsvApplication {
public static void main(String[] args) throws IOException {
File csv1 = new File("/Users/guru/Desktop/Standard.csv");
File csv2 = new File("/Users/guru/Desktop/Match.csv");
List<String> csv1Headers = CsvParser.getHeadersFromACsv(csv1);
//csv1Headers.forEach(h -> System.out.print(h + " "));
//System.out.println();
List<String> csv2Headers = CsvParser.getHeadersFromACsv(csv2);
//csv2Headers.forEach(h -> System.out.print(h + " "));
//System.out.println();
List<String> allCsvHeaders = new ArrayList<>();
allCsvHeaders.addAll(csv1Headers);
allCsvHeaders.addAll(csv2Headers);
//allCsvHeaders.forEach(h -> System.out.print(h + " "));
//System.out.println();
Set<String> uniqueHeaders = new HashSet<>(allCsvHeaders);
//uniqueHeaders.forEach(h -> System.out.print(h + " "));
//System.out.println();
List<CsvVo> csv1Records = CsvParser.getRecodrsFromACsv(csv1, csv1Headers);
List<CsvVo> csv2Records = CsvParser.getRecodrsFromACsv(csv2, csv2Headers);
List<CsvVo> allCsvRecords = new ArrayList<>();
String key = "StudentID";
getUniqueRecordsForKey(key, csv1Records, csv2Records,allCsvRecords);
// allCsvRecords.addAll(csv1Records);
//allCsvRecords.addAll(csv2Records);
CsvParser.writeToCsv(new File("/Users/guru/Desktop/Output.csv"), uniqueHeaders, allCsvRecords);
}
private static void getUniqueRecordsForKey(String key, List<CsvVo> csv1Records, List<CsvVo> csv2Records, List<CsvVo> allCsvRecords) {
for (CsvVo record1: csv1Records){
for(CsvVo record2: csv2Records){
if( !record1.getKeyVal().get(key).isEmpty() && record1.getKeyVal().get(key).equals(record2.getKeyVal().get(key))){
HashMap<String ,String> mergedMap = new LinkedHashMap();
CsvVo mergedRecord = new CsvVo(record2.getKeyVal().get(key));
mergeRecords(mergedMap, record1,record2);
mergedRecord.setKeyVal(mergedMap);
csv2Records.remove(record2);
allCsvRecords.add(mergedRecord);
break;
}
}
}
}
private static void mergeRecords(HashMap mergedMap,CsvVo record1, CsvVo record2 ){
mergedMap.putAll(record1.getKeyVal());
mergedMap.putAll(record2.getKeyVal());
}
}
这里我们将文件的全部数据加载到列表中,然后根据我们要加入的列比较数据。在这种情况下,我使用了 StudentID。
不确定这是否适合大文件。
我有 3 个不同的非常大的文件,我的要求是将它们合并到一个 csv 文件中。所有这 3 个文件都有不同的列数,但它们之间包含一些独特的列。
例子 文件 1:
StudentId StudentName ClassId
1 Ajay 6
2 Vinay 8
3 Geeta 6
4 Sameer 7
文件 2:
ClassId ClassColor
6 Blue
7 Grey
8 White
文件 3:
HouseId HouseName StudentId
1 Knights 4
2 Spartans 1
3 Samurai 2
4 Trojans 3
如您所见,file1 和 file2 具有共同的 classId 并且 file1 和 file2 具有共同的 studentId。 现在,基于此信息,我希望通过基于公用密钥加入文件来获得这样的结果。
StudentId StudentName ClassId ClassColor HouseId HouseName
1 Ajay 6 Blue 2 Spartans
2 Vinay 8 White 3 Samurai
3 Geeta 6 Blue 4 Trojans
4 Sameer 7 Grey 1 Knights
这种情况对我来说是全新的,我做了一些研究但没有在 java 中找到解决方案。
在这个阶段,我可以通过以下实现加入两个文件:
Class 保存 csv 文件数据:
import java.util.LinkedHashMap;
import java.util.Map;
public class CsvVo {
private Map<String, String> keyVal;
public CsvVo(String id) {
keyVal = new LinkedHashMap<>();// you may also use HashMap if you don't need to keep order
}
public Map<String, String> getKeyVal() {
return keyVal;
}
public void setKeyVal(Map<String, String> keyVal) {
this.keyVal = keyVal;
}
public void put(String key, String val) {
keyVal.put(key, val);
}
public String get(String key) {
return keyVal.get(key);
}
}
读取文件的 CSV 解析器:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
public class CsvParser {
public static List<CsvVo> getRecodrsFromACsv(File file, List<String> keys) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(file));
List<CsvVo> records = new ArrayList<>();
boolean isHeader = true;
String line = null;
while ((line = br.readLine()) != null) {
if (isHeader) {// first line is header
isHeader = false;
continue;
}
CsvVo record = new CsvVo(file.getName());
String[] lineSplit = line.split(",");
for (int i = 0; i < lineSplit.length; i++) {
record.put(keys.get(i), lineSplit[i]);
}
records.add(record);
}
br.close();
return records;
}
public static List<String> getHeadersFromACsv(File file) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(file));
List<String> headers = null;
String line = null;
while ((line = br.readLine()) != null) {
String[] lineSplit = line.split(",");
headers = new ArrayList<>(Arrays.asList(lineSplit));
break;
}
br.close();
return headers;
}
public static void writeToCsv(final File file, final Set<String> headers, final List<CsvVo> records)
throws IOException {
FileWriter csvWriter = new FileWriter(file);
// write headers
String sep = "";
String[] headersArr = headers.toArray(new String[headers.size()]);
for (String header : headersArr) {
csvWriter.append(sep);
csvWriter.append(header);
sep = ",";
}
csvWriter.append("\n");
// write records at each line
for (CsvVo record : records) {
sep = "";
for (int i = 0; i < headersArr.length; i++) {
csvWriter.append(sep);
csvWriter.append(record.get(headersArr[i]));
sep = ",";
}
csvWriter.append("\n");
}
csvWriter.flush();
csvWriter.close();
}
}
主要Class 测试功能:
import java.io.File;
import java.io.IOException;
import java.util.*;
public class CsvApplication {
public static void main(String[] args) throws IOException {
File csv1 = new File("/Users/guru/Desktop/Standard.csv");
File csv2 = new File("/Users/guru/Desktop/Match.csv");
List<String> csv1Headers = CsvParser.getHeadersFromACsv(csv1);
//csv1Headers.forEach(h -> System.out.print(h + " "));
//System.out.println();
List<String> csv2Headers = CsvParser.getHeadersFromACsv(csv2);
//csv2Headers.forEach(h -> System.out.print(h + " "));
//System.out.println();
List<String> allCsvHeaders = new ArrayList<>();
allCsvHeaders.addAll(csv1Headers);
allCsvHeaders.addAll(csv2Headers);
//allCsvHeaders.forEach(h -> System.out.print(h + " "));
//System.out.println();
Set<String> uniqueHeaders = new HashSet<>(allCsvHeaders);
//uniqueHeaders.forEach(h -> System.out.print(h + " "));
//System.out.println();
List<CsvVo> csv1Records = CsvParser.getRecodrsFromACsv(csv1, csv1Headers);
List<CsvVo> csv2Records = CsvParser.getRecodrsFromACsv(csv2, csv2Headers);
List<CsvVo> allCsvRecords = new ArrayList<>();
String key = "StudentID";
getUniqueRecordsForKey(key, csv1Records, csv2Records,allCsvRecords);
// allCsvRecords.addAll(csv1Records);
//allCsvRecords.addAll(csv2Records);
CsvParser.writeToCsv(new File("/Users/guru/Desktop/Output.csv"), uniqueHeaders, allCsvRecords);
}
private static void getUniqueRecordsForKey(String key, List<CsvVo> csv1Records, List<CsvVo> csv2Records, List<CsvVo> allCsvRecords) {
for (CsvVo record1: csv1Records){
for(CsvVo record2: csv2Records){
if( !record1.getKeyVal().get(key).isEmpty() && record1.getKeyVal().get(key).equals(record2.getKeyVal().get(key))){
HashMap<String ,String> mergedMap = new LinkedHashMap();
CsvVo mergedRecord = new CsvVo(record2.getKeyVal().get(key));
mergeRecords(mergedMap, record1,record2);
mergedRecord.setKeyVal(mergedMap);
csv2Records.remove(record2);
allCsvRecords.add(mergedRecord);
break;
}
}
}
}
private static void mergeRecords(HashMap mergedMap,CsvVo record1, CsvVo record2 ){
mergedMap.putAll(record1.getKeyVal());
mergedMap.putAll(record2.getKeyVal());
}
}
这里我们将文件的全部数据加载到列表中,然后根据我们要加入的列比较数据。在这种情况下,我使用了 StudentID。
不确定这是否适合大文件。