likes
comments
collection
share

揭秘秒级导出导出百万千万级Excel数据的终极秘籍!让你的数据操作瞬间飙升!

作者站长头像
站长
· 阅读数 4

如何高效导出百万千万级Excel数据?超越传统极限,助你事半功倍!

关于作者

  • 作者介绍


揭秘秒级导出导出百万千万级Excel数据的终极秘籍!让你的数据操作瞬间飙升!

技术选型

当需要导出大量数据到Excel时,我们需要考虑选择哪个技术方案。下面我将列举常见的三种选型:POI、EasyExcel和Jxls,并对它们的特点进行横向比较。

方案优点缺点
POI- Apache开源项目,稳定性高- 支持多种格式(XLS、XLSX等)- 可以读写复杂表格(如带有合并单元格或图表的表格)- API使用较为繁琐 - 对于大数据量可能会存在性能问题
EasyExcel- 简单易用,API设计友好 - 高效处理大量数据 - 支持自定义样式和格式化器等功能- 不支持老版本 Excel 文件 (如 xls 格式)
Jxls 2.x- 具备良好的模板引擎机制,支持通过模板文件生成 Excel 表格。- 提供了可视化设计器来快速创建报告模板。- 性能相对其他两个方案稍弱一些 - 模板与代码耦合度较高

在实际应用中我们可以根据具体需求灵活选择不同的技术方案。如果需要处理非常复杂、规则不固定且包含各种嵌套的 Excel 文件,可以选择 Apache POI。如果需要处理大量数据且性能较高,则推荐使用 EasyExcel。而对于需要频繁生成带有模板的报表或者做可视化定制开发时,Jxls可能是更好的选择。

最后提醒一点,在进行技术选型时除了关注技术本身的优缺点外,还应该考虑到自身团队成员熟悉度、项目需求和规模等方面因素来综合决策。本篇文章设计百万千万数据的Easyexcel的动态表头数据的导出!

引入依赖

<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>easyexcel</artifactId>
    <version>3.0.2</version>
</dependency>

Service层

	/**
     *  导出数据到excel
     * @param datasetId
     * @param user
     */
public void exportExcelDataById(Long Id, String dataName, User user, HttpServletRequest request, HttpServletResponse respons);

ServiceImpl层

import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.write.metadata.WriteSheet;
import com.alibaba.excel.write.style.column.LongestMatchColumnWidthStyleStrategy;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.hbisdt.common.core.common.Constants;
import com.hbisdt.common.model.User;
import com.hbisdt.server.datasource.constant.Const;
import com.hbisdt.server.datasource.service.DataSetTableService;
import com.hbisdt.server.datasource.service.DatasetTableFieldColumnService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;

import javax.annotation.Resource;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.stream.Collectors;

/**
 * @ClassName: TableFieldColumnServiceImpl
 * @Author: zhangsr
 * @Date: 2023/6/2 14:17
 * @Version: 
 */
@Slf4j
@Service("TableFieldService")
public class TableFieldServiceImpl implements TableFieldService {

    @Autowired
    private TableService tableService;

    @Resource(name = "threadPoolTaskExecutor")
    private ThreadPoolTaskExecutor threadPoolTaskExecutor;

    /**
     * 导出数据集数据到excel
     *
     * @param datasetId
     * @param datasetName
     * @param user
     * @param request
     * @param respons
     */
    @Override
    public void exportExcelDataById(Long datasetId, String datasetName, User user, HttpServletRequest request, HttpServletResponse respons) {
        long startTime = System.currentTimeMillis();
        //查询表头字段并处理excel表头
        Map<String, Object> tableBar = tableService.getDatasetDataById(datasetId, 1, 1, user);
        List<List<String>> excelHeadList = new ArrayList();
        List<String> mappingExcelFieldList = new ArrayList<>();
        JSONObject tableBarJson = new JSONObject(tableBar);
        JSONArray columnToPageJsonArray = tableBarJson.getJSONArray("columnToPage");

        for (int i = 0; i < columnToPageJsonArray.size(); ++i) {
            mappingExcelFieldList.add(columnToPageJsonArray.getJSONObject(i).getString("pageDataColumnName"));
            excelHeadList.add(new ArrayList<>(Collections.singleton(columnToPageJsonArray.getJSONObject(i).getString("columnName"))));
        }

/*
        //处理数据
        Map<String, Object> tableData = tableService.getDatasetDataById(datasetId, 1, 10000, user);

        JSONObject tableDataJson = new JSONObject(tableData);
        JSONArray tableDataJsonArray = tableDataJson.getJSONObject("pageData").getJSONArray("resultList");

//        System.out.println("tableData - > " + tableData);
//        System.out.println("excelHeadList - > " + excelHeadList);
//        System.out.println("tableDataJsonArray - > " + tableDataJsonArray);

        List<List<String>> reslist = new ArrayList<>();
        for (int i = 0; i < tableDataJsonArray.size(); i++) {
            List<String> templist = new ArrayList<>();
            for (String mappingExcelField :
                    mappingExcelFieldList) {
                templist.add(tableDataJsonArray.getJSONObject(i).getString(mappingExcelField));
            }
            reslist.add(new ArrayList<>(templist));
        }
*/

        // 提交异步任务
        List<Future<?>> futures = new ArrayList<>();
        int pageIndex = 0;
        int pageSize = 5000;

        while (true) {
            //异步查询数据
            int finalPageIndex = pageIndex;
            Future<List<List<String>>> future = threadPoolTaskExecutor.submit(() -> fetchData(datasetId, finalPageIndex, pageSize, mappingExcelFieldList, user));
            futures.add(future);
            // 增加页面索引
            pageIndex++;

            List<List<String>> lastDataList;
            try {
                lastDataList = future.get();
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            } catch (ExecutionException e) {
                throw new RuntimeException(e);
            }
            // 当查询结果小于pageSize时,表示已经查询完毕
            if (lastDataList.size() < pageSize || (future.isDone() && future.isCancelled()) {
                break;
            }
        }

        for (Future<?> future : futures) {
            try {
                List<List<String>> dataList = (List<List<String>>) future.get();
                // 这里注意 使用swagger 会导致各种问题,easyexcel官方文档推荐直接用浏览器或者用postman测试
                respons.setContentType("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
                respons.setCharacterEncoding(Constants.UTF_8);
                // 这里URLEncoder.encode可以防止中文乱码 当然和easyexcel没有关系
                datasetName = URLEncoder.encode(datasetName, Constants.UTF_8);
                respons.setHeader("filename", datasetName + ".xlsx");
                respons.setHeader("Content-disposition", "attachment;filename*=utf-8''" + datasetName + ".xlsx");
                // 创建excel
                EasyExcel.write(respons.getOutputStream())
                        .head(excelHeadList)
                        // 自适应列宽(不需要就忽略)
                        .registerWriteHandler(new LongestMatchColumnWidthStyleStrategy())
                        .sheet("sheet0")
                        .doWrite(dataList);

                log.info("数据生成结束,数据量={},耗时={}ms", dataList.size(), System.currentTimeMillis() - startTime);
            } catch (Exception e) {
                log.error("用户{}下载报表异常:{}", user.getUserName(),e.getMessage());
                throw new RuntimeException("下载报表异常");
            }
        }
    }

    /**
     * 处理数据
     *
     * @param datasetId
     * @param pageIndex
     * @param pageSize
     * @param mappingExcelFieldList
     * @param user
     * @return
     */
    private List<List<String>> fetchData(Long datasetId, int pageIndex, int pageSize, List<String> mappingExcelFieldList, User user) {
        // 处理数据
        // 通过tableService去得到对应的所有的数据信息
        Map<String, Object> tableData = tableService.getDatasetDataById(datasetId, pageIndex, pageSize, user);
        JSONObject tableDataJson = new JSONObject(tableData);
        JSONArray tableDataJsonArray = tableDataJson.getJSONObject("pageData").getJSONArray("resultList");

        List<List<String>> reslist = new ArrayList<>();
        for (int i = 0; i < tableDataJsonArray.size(); i++) {
            List<String> templist = new ArrayList<>();
            for (String mappingExcelField :
                    mappingExcelFieldList) {
                templist.add(tableDataJsonArray.getJSONObject(i).getString(mappingExcelField));
            }
            reslist.add(new ArrayList<>(templist));
        }
        return reslist;
    }
}

controller

@ApiOperation(value = "导出数据集数据到excel")
@GetMapping("/exportExcelDataById")
public void exportExcelDataById(@RequestParam Long datasetId,
                                       @RequestParam String datasetName,
                                       HttpServletRequest request,
                                       HttpServletResponse respons) {
    // 通过请求头的token来解析user
    User user = SecurityUtils.getLoginUser(request);
    if (datasetId == null || datasetId < 0){
        throw new NotFoundException("数据集Id不能为空!");
    }
    tableService.exportExcelDataById(datasetId, datasetName, user, request, respons);
}

异步任务模板

// 定义线程池
ExecutorService executorService = Executors.newFixedThreadPool(10);

// 提交异步任务
List<Future<?>> futures = new ArrayList<>();
int pageSize = 10000;
int pageIndex = 0;

while (true) {
    // 异步查询数据
    Future<List<Data>> future = executorService.submit(() -> {
        // 根据实际情况查询数据,这里假设返回的是Data对象的列表
        return fetchData(pageIndex, pageSize);
    });

    futures.add(future);

    // 增加页面索引
    pageIndex++;‘
        
	List<Data> lastDataList;
    try {
        lastDataList = future.get();
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    } catch (ExecutionException e) {
        throw new RuntimeException(e);
    }

    // 当查询结果小于pageSize时,表示已经查询完毕
    if (lastDataList.size() < pageSize || (future.isDone() && future.isCancelled()) {
}

通过easyexcel使用异步任务写入excel模板

public class ExcelExporter {
    public static void main(String[] args) {
        // 创建ExcelWriter
        ExcelWriter excelWriter = EasyExcel.write("output.xlsx").build();

        // 定义线程池
        ExecutorService executorService = Executors.newFixedThreadPool(10);

        // 提交异步任务
        List<Future<?>> futures = new ArrayList<>();
        int pageSize = 10000;
        int pageIndex = 0;

        while (true) {
            // 异步查询数据
            Future<List<Data>> future = executorService.submit(() -> {
                // 根据实际情况查询数据,这里假设返回的是Data对象的列表
                return fetchData(pageIndex, pageSize);
            });

            futures.add(future);

            // 增加页面索引
            pageIndex++;

            List<Data> lastDataList;
            try {
                lastDataList = future.get();
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            } catch (ExecutionException e) {
                throw new RuntimeException(e);
            }
            
            // 当查询结果小于pageSize时,表示已经查询完毕
            if (lastDataList.size() < pageSize || (future.isDone() && future.isCancelled()) {
                break;
            }
        }

        // 写入数据到Excel
        int sheetIndex = 0;
        for (Future<?> future : futures) {
            try {
                List<Data> dataList = (List<Data>) future.get();

                WriteSheet writeSheet = EasyExcel.writerSheet(sheetIndex, "Sheet" + sheetIndex).build();
                excelWriter.write(dataList, writeSheet);

                sheetIndex++;
            } catch (InterruptedException | ExecutionException e) {
                e.printStackTrace();
            }
        }

        // 关闭ExcelWriter和线程池
        excelWriter.finish();
        executorService.shutdown();
    }

    // 模拟查询数据
    private static List<Data> fetchData(int pageIndex, int pageSize) {
        // TODO: 根据实际情况查询数据并返回
        return new ArrayList<>();
    }
}

测试

测试时间 索引页大小 测试数据量   
32s	5000 	 多用户测试320w
11s 	5000 	 多用户测试100w
27.3s 	5000 	 单用户测试320w
16.5s 	5000 	 单用户测试210w
9.3s 	5000 	 单用户测试100w
==============================
24s 	1w 	 单用户测试320w
17.3s	1w 	 单用户测试210w
8.7s	1w  	 单用户测试100w

针对导出GC overhead limit exceeded优化

通过对百万级以及千万级数据的分片思想

/**
 * @ClassName: TableFieldServiceImpl
 * @Author: zhangsr
 * @Date: 2023/6/2 14:17
 * @Version: 
 */
@Slf4j
@Service("TableFieldService")
public class TableFieldServiceImpl implements TableFieldService {

    @Autowired
    private TableService tableService;

    @Resource(name = "threadPoolTaskExecutor")
    private ThreadPoolTaskExecutor threadPoolTaskExecutor;

    /**
     * 导出数据集数据到excel
     *
     * @param datasetId   数据集Id
     * @param datasetName 数据集名称
     * @param user        用户
     * @param request     请求
     * @param response    响应
     */
    @Override
    public void exportExcelDataById(Long datasetId, String datasetName, User user, HttpServletRequest request, HttpServletResponse response) {
        long startTime = System.currentTimeMillis();
        //查询表头字段
        Map<String, Object> tableBar = tableService.getDatasetDataById(datasetId, 1, 1, user);
        List<List<String>> excelHeadList = new ArrayList();
        List<String> mappingExcelFieldList = new ArrayList<>();
        JSONObject tableBarJson = new JSONObject(tableBar);
        JSONArray columnToPageJsonArray = tableBarJson.getJSONArray("columnToPage");

        // 获取页面索引
        Long totalCount = tableBarJson.getJSONObject("pageData").getLong("totalCount");

        // 处理excel表头
        for (int i = 0; i < columnToPageJsonArray.size(); i++) {
            mappingExcelFieldList.add(columnToPageJsonArray.getJSONObject(i).getString("pageDataColumnName"));
            excelHeadList.add(new ArrayList<>(Collections.singleton(columnToPageJsonArray.getJSONObject(i).getString("columnName"))));
        }

        int pageIndex = 0;
        int pageSize = 5000;
        int fragSize = 100000;
        Long fragTotal = totalCount % fragSize == 0 ? totalCount / fragSize : (totalCount / fragSize) + 1;

        // 分片处理
        for (int i = 0; i < fragTotal; i++) {
            Long indexTotalTemp = (totalCount - (i * fragSize)) < fragSize ? (totalCount - (i * fragSize)) : fragSize;
            Long indexTotal = indexTotalTemp % pageSize == 0 ? indexTotalTemp / pageSize : (indexTotalTemp / pageSize) + 1;
            
            // 提交异步任务
            List<Future<?>> futures = new ArrayList<>();
            for (int j = 0; j < indexTotal; j++) {
                Future<List<List<String>>> future;
                try {
                    // 异步查询数据
                    int finalPageIndex = pageIndex;
                    future = threadPoolTaskExecutor.submit(() -> fetchData(datasetId, finalPageIndex, pageSize, mappingExcelFieldList, user));
                    futures.add(future);
                    // 增加页面索引
                    pageIndex++;
                } catch (Exception e) {
                    log.error("导出excel异常:{}", e.getMessage());
                    throw new ServerException("导出excel异常!");
                }
                if ((future.isDone() && future.isCancelled())) {
                    break;
                }
            }

            for (Future<?> future : futures) {
                try {
                    List<List<String>> dataList = (List<List<String>>) future.get();
                    // 这里注意 使用swagger 会导致各种问题,easyexcel官方文档推荐直接用浏览器或者用postman测试
                    response.setContentType("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
                    response.setCharacterEncoding(Constants.UTF_8);
                    // 这里URLEncoder.encode可以防止中文乱码 当然和easyexcel没有关系
                    datasetName = URLEncoder.encode(datasetName, Constants.UTF_8);
                    response.setHeader("filename", datasetName + ".xlsx");
                    response.setHeader("Content-disposition", "attachment;filename*=utf-8''" + datasetName + ".xlsx");
                    // 创建excel
                    // 利用easyexcel的特性,拿到response.getOutputStream()是相同的,去写入同一个excel文件
                    EasyExcel.write(response.getOutputStream())
                            .head(excelHeadList)
                            // 自适应列宽(不需要就忽略)
                            .registerWriteHandler(new LongestMatchColumnWidthStyleStrategy())
                            .sheet("sheet0")
                            .doWrite(dataList);
                } catch (Exception e) {
                    log.error("用户{}下载报表异常:{}", user.getUserName(), e.getMessage());
                    throw new ServerException("下载报表异常!");
                }
            }
            log.info("数据生成结束,当前分片={},当前分片索引量={},数据量={},耗时={}ms", i, indexTotal, indexTotalTemp, System.currentTimeMillis() - startTime);
        }
    }


    /**
     * 处理数据
     *
     * @param datasetId             数据集Id
     * @param pageIndex             页面索引
     * @param pageSize              索引大小
     * @param mappingExcelFieldList excel映射字段集合
     * @param user                  用户
     * @return
     */
    private List<List<String>> fetchData(Long datasetId, int pageIndex, int pageSize, List<String> mappingExcelFieldList, User user) {
        //处理数据
        Map<String, Object> tableData = tableService.getDatasetDataById(datasetId, pageIndex, pageSize, user);
        JSONObject tableDataJson = new JSONObject(tableData);
        JSONArray tableDataJsonArray = tableDataJson.getJSONObject("pageData").getJSONArray("resultList");

        List<List<String>> resList = new ArrayList<>();
        for (int i = 0; i < tableDataJsonArray.size(); i++) {
            List<String> tempList = new ArrayList<>();
            for (String mappingExcelField : mappingExcelFieldList) {
                tempList.add(tableDataJsonArray.getJSONObject(i).getString(mappingExcelField));
            }
            resList.add(new ArrayList<>(tempList));
        }
        return resList;
    }
}