如何使用准备好的语句和批处理将大型 csv 导入 mysql 和 java
How to import a large csv into a mysql with java using prepared statements and batch
这里我有一个代码块,它将插入到一个名为 Earnings 的 table 中,并将为 csv 的每一行使用 4 个整数的准备语句,并将它们添加到一个批处理中,并在所有的时候执行该批处理行被读取。为什么这会在 csv 文件的第 2009 行开始插入,换句话说,它似乎是 unstable 并且只会将 csv 的一部分导入到 mysql table 中,我该如何制作6530 行 csv 的批处理执行效率更高?另外我如何处理一个不存在的准备好的语句,说有可能在准备好的语句的参数索引 4 处有一个 int 或者可能是空的。
statement.executeUpdate("CREATE TABLE Earnings("
+ "tournament INT references Tournaments( tournament_id ), "
+ "player INT references Players( player_id ), "
+ "prize_money INT, "
+ "position INT, "
+ "PRIMARY KEY(tournament, player))");
Statement statement = conn.createStatement();
try{
String insertTableSQL2 = "INSERT INTO Earnings"
+ "(tournament, player, prize_money, position) VALUES"
+ "(?,?,?,?)";
PreparedStatement preparedStatement2 = conn.prepareStatement(insertTableSQL2);
BufferedReader reader2 = new BufferedReader(new FileReader("earnings.csv"));
String line = null; //line read from csv
Scanner scanner = null; //scanned line
while ((line = reader2.readLine()) != null) {
scanner = new Scanner(line);
scanner.useDelimiter(",");
while (scanner.hasNext()) {
String str = scanner.next();
preparedStatement2.setInt(1, Integer.parseInt(str));
String str2 = scanner.next();
preparedStatement2.setInt(2, Integer.parseInt(str2));
String str3 = scanner.next();
preparedStatement2.setInt(3, Integer.parseInt(str3));
String str4 = scanner.next();
preparedStatement2.setInt(4, Integer.parseInt(str4));
preparedStatement2.addBatch();
}
}
preparedStatement2.executeBatch();
preparedStatement2.close();
scanner.close();
reader2.close(); //closing CSV reader
}
catch(Exception e){
e.printStackTrace();
}
System.out.println("earnings.csv imported");
csv 的格式如下:
19367,26,677,1
19367,112,271,2
19372,51,134,1
19382,213,150,1
19382,243,50,2
18446,34,2976,1
18446,111,1082,2
19452,28,135,1
使用 LOAD DATA INFILE 语法。如果你使用它,那会更快,也不需要编写所有这些代码行。
勾选here
您可以将库 com.opencsv.CSVReader 用作 CSV reader/writer。
并编写简单的代码逐行读取 CSV 文件并构建 SQL 查询,然后执行查询。
public void loadCSVfile() {
try (CSVReader reader = new CSVReader(new FileReader("earnings.csv"), ',');
Connection connection = DBConnection.getConnection();) {
String insertQuery = "Insert into Earnings (tournament, player, prize_money, position) values (null,?,?,?)";
PreparedStatement pstmt = connection.prepareStatement(insertQuery);
String[] rowData = null;
int i = 0;
while ((rowData = reader.readNext()) != null) {
for (String data : rowData) {
pstmt.setString(i++, data);
pstmt.addBatch();
pstmt.executeBatch();
}
i = 0;
}
} catch (Exception e) {
e.printStackTrace();
}
}
或者如果可能的话用MySQL命令加载:
public void loadCSV()
{
try (Connection connection = DBConnection.getConnection()) {
String sqlQuery = "LOAD DATA LOCAL INFILE '" + "earnings.csv" + "' INTO TABLE Earnings FIELDS TERMINATED BY ','" + " LINES TERMINATED BY '\n' (tournament, player, prize_money, position) ";
Statement stmt = connection.createStatement();
stmt.execute(sqlQuery);
}
catch (Exception e) {
e.printStackTrace();
}
}
这里我有一个代码块,它将插入到一个名为 Earnings 的 table 中,并将为 csv 的每一行使用 4 个整数的准备语句,并将它们添加到一个批处理中,并在所有的时候执行该批处理行被读取。为什么这会在 csv 文件的第 2009 行开始插入,换句话说,它似乎是 unstable 并且只会将 csv 的一部分导入到 mysql table 中,我该如何制作6530 行 csv 的批处理执行效率更高?另外我如何处理一个不存在的准备好的语句,说有可能在准备好的语句的参数索引 4 处有一个 int 或者可能是空的。
statement.executeUpdate("CREATE TABLE Earnings("
+ "tournament INT references Tournaments( tournament_id ), "
+ "player INT references Players( player_id ), "
+ "prize_money INT, "
+ "position INT, "
+ "PRIMARY KEY(tournament, player))");
Statement statement = conn.createStatement();
try{
String insertTableSQL2 = "INSERT INTO Earnings"
+ "(tournament, player, prize_money, position) VALUES"
+ "(?,?,?,?)";
PreparedStatement preparedStatement2 = conn.prepareStatement(insertTableSQL2);
BufferedReader reader2 = new BufferedReader(new FileReader("earnings.csv"));
String line = null; //line read from csv
Scanner scanner = null; //scanned line
while ((line = reader2.readLine()) != null) {
scanner = new Scanner(line);
scanner.useDelimiter(",");
while (scanner.hasNext()) {
String str = scanner.next();
preparedStatement2.setInt(1, Integer.parseInt(str));
String str2 = scanner.next();
preparedStatement2.setInt(2, Integer.parseInt(str2));
String str3 = scanner.next();
preparedStatement2.setInt(3, Integer.parseInt(str3));
String str4 = scanner.next();
preparedStatement2.setInt(4, Integer.parseInt(str4));
preparedStatement2.addBatch();
}
}
preparedStatement2.executeBatch();
preparedStatement2.close();
scanner.close();
reader2.close(); //closing CSV reader
}
catch(Exception e){
e.printStackTrace();
}
System.out.println("earnings.csv imported");
csv 的格式如下:
19367,26,677,1
19367,112,271,2
19372,51,134,1
19382,213,150,1
19382,243,50,2
18446,34,2976,1
18446,111,1082,2
19452,28,135,1
使用 LOAD DATA INFILE 语法。如果你使用它,那会更快,也不需要编写所有这些代码行。
勾选here
您可以将库 com.opencsv.CSVReader 用作 CSV reader/writer。 并编写简单的代码逐行读取 CSV 文件并构建 SQL 查询,然后执行查询。
public void loadCSVfile() {
try (CSVReader reader = new CSVReader(new FileReader("earnings.csv"), ',');
Connection connection = DBConnection.getConnection();) {
String insertQuery = "Insert into Earnings (tournament, player, prize_money, position) values (null,?,?,?)";
PreparedStatement pstmt = connection.prepareStatement(insertQuery);
String[] rowData = null;
int i = 0;
while ((rowData = reader.readNext()) != null) {
for (String data : rowData) {
pstmt.setString(i++, data);
pstmt.addBatch();
pstmt.executeBatch();
}
i = 0;
}
} catch (Exception e) {
e.printStackTrace();
}
}
或者如果可能的话用MySQL命令加载:
public void loadCSV()
{
try (Connection connection = DBConnection.getConnection()) {
String sqlQuery = "LOAD DATA LOCAL INFILE '" + "earnings.csv" + "' INTO TABLE Earnings FIELDS TERMINATED BY ','" + " LINES TERMINATED BY '\n' (tournament, player, prize_money, position) ";
Statement stmt = connection.createStatement();
stmt.execute(sqlQuery);
}
catch (Exception e) {
e.printStackTrace();
}
}