java實(shí)現(xiàn)大文件導(dǎo)出的實(shí)現(xiàn)與優(yōu)化
關(guān)于大文件導(dǎo)出的優(yōu)化迭代情況如下:
計(jì)算機(jī)配置:四核16G內(nèi)存
初始版本為單線程單文件導(dǎo)出文件,mybatis讀 opencsv寫,耗時(shí)將近三小時(shí);
第一輪優(yōu)化改為多線程單文件,提高讀數(shù)據(jù)效率,時(shí)間僅縮減十分鐘;
第二輪改為多線程多文件,提高寫文件效率,時(shí)間縮減一個(gè)半小時(shí);
第三輪使用 Mybatis 流式查詢,并改用 Map 封裝數(shù)據(jù),提高內(nèi)存利用率,時(shí)間縮減十分鐘;
第四輪棄用 Mybatis ,改用原生 JDBC 獲取數(shù)據(jù)并直接拼接,時(shí)間縮減十分鐘;
第五輪棄用 opencsv ,改用 BufferWriter 直接寫數(shù)據(jù),時(shí)間縮減十分鐘;
輸出:
2023-04-23 22:01:30 [main] INFO WriteData - 單線程單文件 total time in 258s
2023-04-23 22:02:44 [main] INFO WriteData - 固定線程單文件 total time in 74s
2023-04-23 22:03:40 [main] INFO WriteData - 固定線程多文件 total time in 55s
2023-04-23 22:04:18 [main] INFO WriteData - concurrentWrite total time in 37s
2023-04-23 22:26:28 [Thread-1] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-3] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-4] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-6] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-7] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-2] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-5] INFO WriteData - query in 42s
2023-04-23 22:26:30 [Thread-0] INFO WriteData - query in 44s
2023-04-23 22:27:00 [Thread-5] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-1] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-7] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-2] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-3] INFO WriteData - write in 32s
2023-04-23 22:27:00 [Thread-6] INFO WriteData - write in 32s
2023-04-23 22:27:00 [Thread-4] INFO WriteData - write in 32s
2023-04-23 22:27:01 [Thread-0] INFO WriteData - write in 31s
2023-04-23 22:27:01 [main] INFO WriteData - 固定線程單文件 total time in 75s
2023-04-23 22:27:24 [Thread-14] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-13] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-12] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-9] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-11] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-10] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-15] INFO WriteData - query in 22s
2023-04-23 22:27:25 [Thread-8] INFO WriteData - query in 23s
2023-04-23 22:27:55 [Thread-12] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-14] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-9] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-11] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-13] INFO WriteData - write in 31s
2023-04-23 22:27:56 [Thread-15] INFO WriteData - write in 31s
2023-04-23 22:27:56 [Thread-10] INFO WriteData - write in 31s
2023-04-23 22:27:56 [Thread-8] INFO WriteData - write in 31s
2023-04-23 22:27:56 [main] INFO WriteData - 固定線程多文件 total time in 54s
示例代碼
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.*;
import java.time.Duration;
import java.time.LocalDate;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
public class WriteData {
static final Logger log = LoggerFactory.getLogger(WriteData.class);
public static final String PARENT_PATH = "C:\\Users\\qiu01\\Desktop\\server\\docker\\mysql\\master\\data\\stu_data\\";
public static final String URL = "jdbc:mysql://localhost:3307/stu?allowPublicKeyRetrieval=TRUE&useCursorFetch=true";
public static final String USERNAME = "root";
public static final String PASSWORD = "123456";
public static final String SQL = "SELECT * FROM student WHERE id > ? AND id <= ?";
public static final int TOTAL = 10000000;
public static final ThreadPoolExecutor POOL = new ThreadPoolExecutor(8, 9, 3, TimeUnit.SECONDS, new LinkedBlockingDeque<>());
public static final HikariDataSource DS;
static {
HikariConfig config = new HikariConfig();
config.setJdbcUrl(URL);
config.setUsername(USERNAME);
config.setPassword(PASSWORD);
DS = new HikariDataSource(config);
}
public static void main(String[] args) {
// 單線程寫文件
singleThreadWrite();
// 固定線程寫同
concurrentWriteWithFixedThread(true);
concurrentWriteWithFixedThread(false);
concurrentWrite();
}
public static void singleThreadWrite() {
String file = PARENT_PATH + "file.csv";
long start = System.currentTimeMillis();
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get(file))));
Connection connection = DS.getConnection();
PreparedStatement stmt = connection.prepareStatement(SQL,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);) {
stmt.setFetchSize(10000);
stmt.setFetchDirection(ResultSet.FETCH_REVERSE);
stmt.setInt(1, 0);
stmt.setInt(2, 10000000);
ResultSet rs = stmt.executeQuery();
writeToFile(writer, rs);
} catch (SQLException | IOException e) {
throw new RuntimeException(e);
}
log.info("單線程單文件 total time in {}s", getSeconds(start));
emptyFolder();
}
private static void concurrentWriteWithFixedThread(boolean writeInOneFile) {
int batch_size = 1250000;
Thread[] threads = new Thread[TOTAL/batch_size];
long start = System.currentTimeMillis();
for (int i = 0; i < TOTAL; i = i + batch_size) {
final int j = i;
int no = i / batch_size;
Thread t = new Thread(() -> {
String file;
if (writeInOneFile) {
file = PARENT_PATH + "file.csv";
} else {
file = PARENT_PATH + "file_" + no + ".csv";
}
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));
Connection connection = DS.getConnection();
PreparedStatement stmt = connection.prepareStatement(SQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
) {
stmt.setFetchSize(10000);
stmt.setFetchDirection(ResultSet.FETCH_REVERSE);
stmt.setInt(1, j);
stmt.setInt(2, j + batch_size);
long queryStart = System.currentTimeMillis();
try (ResultSet rs = stmt.executeQuery()) {
log.info("query in {}s", getSeconds(queryStart));
long writeStart = System.currentTimeMillis();
writeToFile(writer, rs);
log.info("write in {}s", getSeconds(writeStart));
}
} catch (SQLException | IOException e) {
throw new RuntimeException(e);
}
});
t.start();
threads[no] = t;
}
for (Thread t : threads) {
try {
t.join();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
if (writeInOneFile) {
log.info("固定線程單文件 total time in {}s", getSeconds(start));
} else {
log.info("固定線程多文件 total time in {}s", getSeconds(start));
}
// emptyFolder();
}
private static void concurrentWrite() {
int batch_size = 10000;
CompletableFuture<Void>[] futures = new CompletableFuture[TOTAL/batch_size];
long start = System.currentTimeMillis();
for (int i = 0; i < TOTAL; i = i + batch_size) {
final int j = i;
int no = i / batch_size;
CompletableFuture<Void> t = CompletableFuture.runAsync(() -> {
String file = PARENT_PATH + "file_" + no + ".csv";
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get(file))));
Connection connection = DS.getConnection();
PreparedStatement stmt = connection.prepareStatement(SQL,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
) {
stmt.setInt(1, j);
stmt.setInt(2, j + batch_size);
try (ResultSet rs = stmt.executeQuery()){
writeToFile(writer, rs);
}
} catch (SQLException | IOException e) {
throw new RuntimeException(e);
}
},POOL);
futures[no] = t;
}
CompletableFuture.allOf(futures).join();
log.info("多線程多文件 total time in {}s", getSeconds(start));
POOL.shutdown();
emptyFolder();
}
private static void emptyFolder() {
File file = new File(PARENT_PATH);
File[] files = file.listFiles();
for (File f : files) {
f.delete();
}
}
private static void writeToFile(BufferedWriter writer, ResultSet rs) throws SQLException, IOException {
StringBuilder builder = new StringBuilder();
while (rs.next()) {
String firstName = rs.getString("first_name");
String lastName = rs.getString("last_name");
LocalDate dob = rs.getDate("date_of_birth").toLocalDate();
String gender = rs.getString("gender");
String email = rs.getString("email");
String phone = rs.getString("phone_number");
String address = rs.getString("address");
String city = rs.getString("city");
String state = rs.getString("state");
String zip = rs.getString("zip_code");
String country = rs.getString("country");
String nationality = rs.getString("nationality");
String religion = rs.getString("religion");
String emergencyContactName = rs.getString("emergency_contact_name");
String emergencyContactPhone = rs.getString("emergency_contact_phone_number");
String guardianName = rs.getString("guardian_name");
String guardianPhone = rs.getString("guardian_phone_number");
String highSchoolName = rs.getString("high_school_name");
double highSchoolGpa = rs.getDouble("high_school_gpa");
int highSchoolGradYear = rs.getInt("high_school_graduation_year");
String major = rs.getString("major");
String degreeLevel = rs.getString("degree_level");
String enrollmentStatus = rs.getString("enrollment_status");
builder.append(firstName).append("|");
builder.append(lastName).append("|");
builder.append(dob).append("|");
builder.append(gender).append("|");
builder.append(email).append("|");
builder.append(phone).append("|");
builder.append(address).append("|");
builder.append(city).append("|");
builder.append(state).append("|");
builder.append(zip).append("|");
builder.append(country).append("|");
builder.append(nationality).append("|");
builder.append(religion).append("|");
builder.append(emergencyContactName).append("|");
builder.append(emergencyContactPhone).append("|");
builder.append(guardianName).append("|");
builder.append(guardianPhone).append("|");
builder.append(highSchoolName).append("|");
builder.append(highSchoolGpa).append("|");
builder.append(highSchoolGradYear).append("|");
builder.append(major).append("|");
builder.append(degreeLevel).append("|");
builder.append(enrollmentStatus).append("\n");
writer.write(builder.toString());
builder.delete(0, builder.length());
}
}
private static long getSeconds(long start) {
return Duration.ofMillis(System.currentTimeMillis() - start).getSeconds();
}
}
以上就是java實(shí)現(xiàn)大文件導(dǎo)出的實(shí)現(xiàn)與優(yōu)化的詳細(xì)內(nèi)容,更多關(guān)于java文件導(dǎo)出的資料請(qǐng)關(guān)注腳本之家其它相關(guān)文章!
相關(guān)文章
Spring boot 應(yīng)用實(shí)現(xiàn)動(dòng)態(tài)刷新配置詳解
這篇文章主要介紹了spring boot 配置動(dòng)態(tài)刷新實(shí)現(xiàn)詳解,文中通過示例代碼介紹的非常詳細(xì),對(duì)大家的學(xué)習(xí)或者工作具有一定的參考學(xué)習(xí)價(jià)值,需要的朋友可以參考下2021-09-09
SpringBoot結(jié)合prometheus自定義埋點(diǎn)方式
文章介紹了如何使用Spring Actuator、Prometheus和Grafana進(jìn)行度量統(tǒng)計(jì)收集、數(shù)據(jù)收集和數(shù)據(jù)展示,以監(jiān)控生成環(huán)境機(jī)器的性能指標(biāo)和業(yè)務(wù)數(shù)據(jù)指標(biāo)2024-12-12
圖解Java中歸并排序算法的原理與實(shí)現(xiàn)
歸并排序是建立在歸并操作上的一種有效的排序算法。該算法是采用分治法(Divide and Conquer)的一個(gè)非常典型的應(yīng)用。本文將通過圖片詳解插入排序的原理及實(shí)現(xiàn),需要的可以參考一下2022-08-08

