目 录CONTENT

文章目录

单线程从Excel导入100w数据到MySQL

筱晶哥哥
2023-01-24 / 0 评论 / 0 点赞 / 40 阅读 / 27433 字 / 正在检测是否收录...
温馨提示:
本文最后更新于 2024-03-23,若内容或图片失效,请留言反馈。部分素材来自网络,若不小心影响到您的利益,请联系我们删除。

从Excel导入100万数据到MySQL,首先是分批读取Excel中的100w数据,这一点EasyExcel有自己的解决方案,这里我用的是每批20w,其次就是往DB里插入,怎么去插入,当然不能一条一条的循环,应该批量插入这20w条数据,同样也不能使用MyBatis的批量插入,因为效率也低。

我们应该使用分批读取 + JDBC批处理 + 手动事务控制去插入,这样效率比较快。

前言

这里我使用的是单线程,多线程当然也可以实现,但是多线程容易造成OOM,需要合理的设置线程大小和JVM参数,这里就使用单线程测试下批量操作即可。多线程的话可以自己单独去实现一下,但要注意使用的正确性。

实战

创建SpringBoot项目

POM文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.7.8</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.itjing</groupId>
    <artifactId>excel-batch-deal</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>excel-batch-deal</name>
    <description>excel-batch-deal</description>
    <properties>
        <java.version>1.8</java.version>
    </properties>
    <dependencies>

        <!-- easyexcel -->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>easyexcel</artifactId>
            <version>3.0.5</version>
        </dependency>

        <!-- mybatis和mysql驱动 -->
        <dependency>
            <groupId>org.mybatis.spring.boot</groupId>
            <artifactId>mybatis-spring-boot-starter</artifactId>
            <version>2.2.2</version>
        </dependency>
        <dependency>
            <groupId>com.mysql</groupId>
            <artifactId>mysql-connector-j</artifactId>
            <scope>runtime</scope>
        </dependency>

        <!-- hutool -->
        <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-all</artifactId>
            <version>5.8.9</version>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>

        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
                <configuration>
                    <excludes>
                        <exclude>
                            <groupId>org.projectlombok</groupId>
                            <artifactId>lombok</artifactId>
                        </exclude>
                    </excludes>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>

YAML配置文件

注意jdbc批处理需要在url加上rewriteBatchedStatements=true

server:
  port: 22222
spring:
  application:
    name: excel-batch-deal
  datasource:
    driver-class-name: com.mysql.cj.jdbc.Driver
    url: jdbc:mysql://127.0.0.1:3306/stream_query?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&useSSL=false&serverTimezone=GMT%2B8&allowPublicKeyRetrieval=true&nullCatalogMeansCurrent=true&allowMultiQueries=true&rewriteBatchedStatements=true
    username: root
    password: root

mybatis:
  mapper-locations: classpath:mapper/**/*.xml

Excel文件对应的实体

package com.itjing.excel.vo;

import com.alibaba.excel.annotation.ExcelProperty;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

/**
 * @Description: 用户导出VO
 * @Author: lijing
 * @CreateTime: 2023-01-23 15:31
 */
@Data
@AllArgsConstructor
@NoArgsConstructor
public class UserVO {

    @ExcelProperty("用户名")
    private String name;

    @ExcelProperty("年龄")
    private Integer age;

    @ExcelProperty("邮箱")
    private String email;

    @ExcelProperty("电话")
    private String phone;

}

用户导入监听器

package com.itjing.excel.listener;

import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
import com.itjing.excel.service.UserService;
import com.itjing.excel.vo.UserVO;
import lombok.extern.slf4j.Slf4j;

import java.util.ArrayList;
import java.util.List;

/**
 * @Description: 用户导入监听器
 * @Author: lijing
 * @CreateTime: 2023-01-24 12:06
 */
@Slf4j
public class UserListener extends AnalysisEventListener<UserVO> {

    /**
     * 分批导入,批量值
     */
    private static final int BATCH_COUNT = 200000;

    private List<UserVO> cachedDataList = new ArrayList<>();

    private UserService userService;

    public List<UserVO> getCachedDataList() {
        return cachedDataList;
    }

    public UserListener(UserService userService) {
        this.userService = userService;
    }

    @Override
    public void invoke(UserVO data, AnalysisContext context) {
        cachedDataList.add(data);
        // size是否达到批量值,这里其实就是分批,当数据达到批量值的时候执行一次插入
        if (cachedDataList.size() >= BATCH_COUNT) {
            try {
                saveData();
            } catch (Exception e) {
                log.error("数据解析异常!" + e.getMessage());
            }
            cachedDataList.clear();
        }
    }

    /**
     * Excel中所有数据解析完毕会调用此方法
     *
     * @param context
     */
    @Override
    public void doAfterAllAnalysed(AnalysisContext context) {
        try {
            saveData();
            cachedDataList.clear();
            log.info("所有数据解析完成!");
        } catch (Exception e) {
            log.error("数据解析异常!" + e.getMessage());
            e.printStackTrace();
        }
    }


    /**
     * 保存数据到DB
     */
    private void saveData() {
        userService.importDBFromExcel(cachedDataList);
    }

}

用户业务层

package com.itjing.excel.service;

import com.itjing.excel.dto.UserDTO;
import com.itjing.excel.vo.UserVO;

import java.util.List;

/**
 * @Description: 用户业务接口
 * @Author: lijing
 * @CreateTime: 2023-01-23 15:39
 */
public interface UserService {

    /**
     * 保存数据到DB
     * @param cachedDataList
     */
    void importDBFromExcel(List<UserVO> cachedDataList);
}

package com.itjing.excel.service.impl;

import cn.hutool.db.DbUtil;
import com.itjing.excel.dao.UserMapper;
import com.itjing.excel.dto.UserDTO;
import com.itjing.excel.service.UserService;
import com.itjing.excel.vo.UserVO;
import lombok.extern.slf4j.Slf4j;
import org.apache.ibatis.cursor.Cursor;
import org.apache.ibatis.session.ResultContext;
import org.apache.ibatis.session.ResultHandler;
import org.apache.ibatis.session.SqlSession;
import org.mybatis.spring.SqlSessionTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StopWatch;

import javax.sql.DataSource;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * @Description:
 * @Author: lijing
 * @CreateTime: 2023-01-23 15:40
 */
@Service
@Slf4j
public class UserServiceImpl implements UserService {

    @Autowired
    private DataSource dataSource;

    /**
     * 保存数据到DB
     *
     * @param cachedDataList
     */
    @Override
    public void importDBFromExcel(List<UserVO> cachedDataList) {
        if (!CollectionUtils.isEmpty(cachedDataList)) {
            // JDBC批处理+事务操作完成对数据的插入
            Connection conn = null;
            PreparedStatement ps = null;
            StopWatch sw = new StopWatch();
            sw.start();
            try {
                conn = dataSource.getConnection();
                // 控制事务不自动提交
                conn.setAutoCommit(false);
                String sql = "insert into user (name,age,email,phone) values (?,?,?,?)";
                ps = conn.prepareStatement(sql);
                // 循环结果集:这里循环不支持"烂布袋"表达式
                for (int i = 0; i < cachedDataList.size(); i++) {
                    UserVO item = cachedDataList.get(i);
                    ps.setString(1, item.getName());
                    ps.setInt(2, item.getAge());
                    ps.setString(3, item.getEmail());
                    ps.setString(4, item.getPhone());
                    // 将一组参数添加到此 PreparedStatement 对象的批处理命令中
                    ps.addBatch();
                    //if (i % 50000 == 0) {
                    //    ps.executeBatch();
                    //    ps.clearBatch();
                    //}
                }
                // 执行批处理
                ps.executeBatch();
                ps.clearBatch();
                // 手动提交事务
                conn.commit();
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                // 关连接
                DbUtil.close(conn, ps);
            }
            sw.stop();
            long totalTimeMillis = sw.getTotalTimeMillis();
            log.info("数据插入共耗时:" + totalTimeMillis + "毫秒");
        }
    }
}

用户控制器

package com.itjing.excel.controller;

import com.alibaba.excel.EasyExcel;
import com.itjing.excel.dto.UserDTO;
import com.itjing.excel.listener.UserListener;
import com.itjing.excel.service.UserService;
import com.itjing.excel.util.ExcelUtil;
import com.itjing.excel.vo.UserVO;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.util.StopWatch;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.List;

/**
 * @Description:
 * @Author: lijing
 * @CreateTime: 2023-01-23 15:35
 */
@RestController
@RequestMapping("/user")
@Slf4j
public class UserController {

    /**
     * 从excel导入100万数据到mysql
     */
    @GetMapping("/importExcel")
    public void importExcel() {
        String fileName = "C:\\Users\\Administrator\\Desktop\\用户表.xlsx";
        // 记录开始读取Excel时间,也是导入程序开始时间
        StopWatch sw = new StopWatch();
        sw.start();
        // 读取所有Sheet的数据,每次读完一个Sheet就会调用这个方法
        EasyExcel.read(fileName, UserVO.class, new UserListener(userService)).doReadAll();
        sw.stop();
        log.info("------结束读取耗时" + (sw.getTotalTimeMillis()) + "ms------");
    }
}

本人电脑测试导入100w条数据大概1分钟,这和电脑配置也有一定的关系。

0

评论区