从Excel导入100万数据到MySQL,首先是分批读取Excel中的100w数据,这一点EasyExcel有自己的解决方案,这里我用的是每批20w,其次就是往DB里插入,怎么去插入,当然不能一条一条的循环,应该批量插入这20w条数据,同样也不能使用MyBatis的批量插入,因为效率也低。
我们应该使用分批读取 + JDBC批处理 + 手动事务控制
去插入,这样效率比较快。
前言
这里我使用的是单线程,多线程当然也可以实现,但是多线程容易造成OOM,需要合理的设置线程大小和JVM参数,这里就使用单线程测试下批量操作即可。多线程的话可以自己单独去实现一下,但要注意使用的正确性。
实战
创建SpringBoot项目
略
POM文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.7.8</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.itjing</groupId>
<artifactId>excel-batch-deal</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>excel-batch-deal</name>
<description>excel-batch-deal</description>
<properties>
<java.version>1.8</java.version>
</properties>
<dependencies>
<!-- easyexcel -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.0.5</version>
</dependency>
<!-- mybatis和mysql驱动 -->
<dependency>
<groupId>org.mybatis.spring.boot</groupId>
<artifactId>mybatis-spring-boot-starter</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<scope>runtime</scope>
</dependency>
<!-- hutool -->
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.9</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</project>
YAML配置文件
注意jdbc批处理需要在url加上
rewriteBatchedStatements=true
server:
port: 22222
spring:
application:
name: excel-batch-deal
datasource:
driver-class-name: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://127.0.0.1:3306/stream_query?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&useSSL=false&serverTimezone=GMT%2B8&allowPublicKeyRetrieval=true&nullCatalogMeansCurrent=true&allowMultiQueries=true&rewriteBatchedStatements=true
username: root
password: root
mybatis:
mapper-locations: classpath:mapper/**/*.xml
Excel文件对应的实体
package com.itjing.excel.vo;
import com.alibaba.excel.annotation.ExcelProperty;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* @Description: 用户导出VO
* @Author: lijing
* @CreateTime: 2023-01-23 15:31
*/
@Data
@AllArgsConstructor
@NoArgsConstructor
public class UserVO {
@ExcelProperty("用户名")
private String name;
@ExcelProperty("年龄")
private Integer age;
@ExcelProperty("邮箱")
private String email;
@ExcelProperty("电话")
private String phone;
}
用户导入监听器
package com.itjing.excel.listener;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
import com.itjing.excel.service.UserService;
import com.itjing.excel.vo.UserVO;
import lombok.extern.slf4j.Slf4j;
import java.util.ArrayList;
import java.util.List;
/**
* @Description: 用户导入监听器
* @Author: lijing
* @CreateTime: 2023-01-24 12:06
*/
@Slf4j
public class UserListener extends AnalysisEventListener<UserVO> {
/**
* 分批导入,批量值
*/
private static final int BATCH_COUNT = 200000;
private List<UserVO> cachedDataList = new ArrayList<>();
private UserService userService;
public List<UserVO> getCachedDataList() {
return cachedDataList;
}
public UserListener(UserService userService) {
this.userService = userService;
}
@Override
public void invoke(UserVO data, AnalysisContext context) {
cachedDataList.add(data);
// size是否达到批量值,这里其实就是分批,当数据达到批量值的时候执行一次插入
if (cachedDataList.size() >= BATCH_COUNT) {
try {
saveData();
} catch (Exception e) {
log.error("数据解析异常!" + e.getMessage());
}
cachedDataList.clear();
}
}
/**
* Excel中所有数据解析完毕会调用此方法
*
* @param context
*/
@Override
public void doAfterAllAnalysed(AnalysisContext context) {
try {
saveData();
cachedDataList.clear();
log.info("所有数据解析完成!");
} catch (Exception e) {
log.error("数据解析异常!" + e.getMessage());
e.printStackTrace();
}
}
/**
* 保存数据到DB
*/
private void saveData() {
userService.importDBFromExcel(cachedDataList);
}
}
用户业务层
package com.itjing.excel.service;
import com.itjing.excel.dto.UserDTO;
import com.itjing.excel.vo.UserVO;
import java.util.List;
/**
* @Description: 用户业务接口
* @Author: lijing
* @CreateTime: 2023-01-23 15:39
*/
public interface UserService {
/**
* 保存数据到DB
* @param cachedDataList
*/
void importDBFromExcel(List<UserVO> cachedDataList);
}
package com.itjing.excel.service.impl;
import cn.hutool.db.DbUtil;
import com.itjing.excel.dao.UserMapper;
import com.itjing.excel.dto.UserDTO;
import com.itjing.excel.service.UserService;
import com.itjing.excel.vo.UserVO;
import lombok.extern.slf4j.Slf4j;
import org.apache.ibatis.cursor.Cursor;
import org.apache.ibatis.session.ResultContext;
import org.apache.ibatis.session.ResultHandler;
import org.apache.ibatis.session.SqlSession;
import org.mybatis.spring.SqlSessionTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StopWatch;
import javax.sql.DataSource;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* @Description:
* @Author: lijing
* @CreateTime: 2023-01-23 15:40
*/
@Service
@Slf4j
public class UserServiceImpl implements UserService {
@Autowired
private DataSource dataSource;
/**
* 保存数据到DB
*
* @param cachedDataList
*/
@Override
public void importDBFromExcel(List<UserVO> cachedDataList) {
if (!CollectionUtils.isEmpty(cachedDataList)) {
// JDBC批处理+事务操作完成对数据的插入
Connection conn = null;
PreparedStatement ps = null;
StopWatch sw = new StopWatch();
sw.start();
try {
conn = dataSource.getConnection();
// 控制事务不自动提交
conn.setAutoCommit(false);
String sql = "insert into user (name,age,email,phone) values (?,?,?,?)";
ps = conn.prepareStatement(sql);
// 循环结果集:这里循环不支持"烂布袋"表达式
for (int i = 0; i < cachedDataList.size(); i++) {
UserVO item = cachedDataList.get(i);
ps.setString(1, item.getName());
ps.setInt(2, item.getAge());
ps.setString(3, item.getEmail());
ps.setString(4, item.getPhone());
// 将一组参数添加到此 PreparedStatement 对象的批处理命令中
ps.addBatch();
//if (i % 50000 == 0) {
// ps.executeBatch();
// ps.clearBatch();
//}
}
// 执行批处理
ps.executeBatch();
ps.clearBatch();
// 手动提交事务
conn.commit();
} catch (Exception e) {
e.printStackTrace();
} finally {
// 关连接
DbUtil.close(conn, ps);
}
sw.stop();
long totalTimeMillis = sw.getTotalTimeMillis();
log.info("数据插入共耗时:" + totalTimeMillis + "毫秒");
}
}
}
用户控制器
package com.itjing.excel.controller;
import com.alibaba.excel.EasyExcel;
import com.itjing.excel.dto.UserDTO;
import com.itjing.excel.listener.UserListener;
import com.itjing.excel.service.UserService;
import com.itjing.excel.util.ExcelUtil;
import com.itjing.excel.vo.UserVO;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.util.StopWatch;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.List;
/**
* @Description:
* @Author: lijing
* @CreateTime: 2023-01-23 15:35
*/
@RestController
@RequestMapping("/user")
@Slf4j
public class UserController {
/**
* 从excel导入100万数据到mysql
*/
@GetMapping("/importExcel")
public void importExcel() {
String fileName = "C:\\Users\\Administrator\\Desktop\\用户表.xlsx";
// 记录开始读取Excel时间,也是导入程序开始时间
StopWatch sw = new StopWatch();
sw.start();
// 读取所有Sheet的数据,每次读完一个Sheet就会调用这个方法
EasyExcel.read(fileName, UserVO.class, new UserListener(userService)).doReadAll();
sw.stop();
log.info("------结束读取耗时" + (sw.getTotalTimeMillis()) + "ms------");
}
}
本人电脑测试导入100w条数据大概1分钟,这和电脑配置也有一定的关系。
评论区