easyexcel
easyexcel copied to clipboard
读取 poi 生成的excel报错 com.alibaba.excel.exception.ExcelAnalysisException: org.xml.sax.SAXParseException; lineNumber: 44476; columnNumber: 81; Invalid byte 2 of 4-byte UTF-8 sequence.
ZipOutputStream zipOutputStream = new ZipOutputStream(new FileOutputStream("E:/company/qiyi/bi2.0/temp1.zip")); zipOutputStream.putNextEntry(new ZipEntry("temp.csv"));
ExcelReader reader = EasyExcel.read("E:/company/qiyi/bi2.0/Job_Seeker_V15.xlsx").build();
List<ReadSheet> readSheets = reader.excelExecutor().sheetList();
List<ReadSheet> results = Lists.newArrayList();
if(!readSheets.isEmpty()){
for(int i = 0; i < readSheets.size(); i++){
ReadSheet init;
if(0 == i){
init = EasyExcel.readSheet(readSheets.get(i).getSheetNo()).registerReadListener(new NoModelDataExcelToCsvListener(zipOutputStream, true)).build();
}else{
init = EasyExcel.readSheet(readSheets.get(i).getSheetNo()).registerReadListener(new NoModelDataExcelToCsvListener(zipOutputStream, false)).build();
}
results.add(init);
}
}
reader.read(results);
reader.finish();
zipOutputStream.closeEntry();
zipOutputStream.close();
public class NoModelDataExcelToCsvListener extends AnalysisEventListener<Map<Integer, String>> {
/**
* 每隔5条存储数据库,实际使用中可以3000条,然后清理list ,方便内存回收
*/
private static final int BATCH_COUNT = 100;
List<Map<Integer, String>> list = new ArrayList<Map<Integer, String>>();
private ZipOutputStream zipOutputStream;
/**
* 是否读取表头开关
*/
private boolean headerOk;
public NoModelDataExcelToCsvListener(){
}
public NoModelDataExcelToCsvListener(ZipOutputStream zipOutputStream, boolean headerOk){
this.zipOutputStream = zipOutputStream;
this.headerOk = headerOk;
}
@SneakyThrows
@Override
public void invoke(Map<Integer, String> data, AnalysisContext context) {
log.info("解析到一条数据:id:{}, userId", data.get(0), data.get(1));
list.add(data);
if(list.size() >= BATCH_COUNT){
saveData();
list.clear();
}
}
@Override
public void doAfterAllAnalysed(AnalysisContext context) {
saveData();
log.info("====== 公共导出 excel 转 csv 完成! =====");
}
@Override
public void onException(Exception exception, AnalysisContext context) {
log.error("解析失败,但是继续解析下一行:{}", exception.getMessage());
// 如果是某一个单元格的转换异常 能获取到具体行号
// 如果要获取头的信息 配合invokeHeadMap使用
if (exception instanceof ExcelAnalysisException) {
ExcelDataConvertException excelDataConvertException = (ExcelDataConvertException) exception;
log.error("第{}行,第{}列解析异常", excelDataConvertException.getRowIndex(),
excelDataConvertException.getColumnIndex());
}
}
/**
* 加上存储数据库
*/
private void saveData() {
// log.info("{}条数据,开始存储数据库!", list.size());
// log.info("存储数据库成功!");
}
@Override
public void invokeHeadMap(Map<Integer, String> headMap, AnalysisContext context) {
log.info("head解析到一条数据:{}", JSON.toJSONString(headMap));
if(headerOk){
list.add(headMap);
}else{
log.info("=== 多个sheet,当前不需要表头信息 ====");
}
}
}
异常提示
com.alibaba.excel.exception.ExcelAnalysisException: org.xml.sax.SAXParseException; lineNumber: 44476; columnNumber: 81; Invalid byte 2 of 4-byte UTF-8 sequence.
at com.alibaba.excel.analysis.v07.XlsxSaxAnalyser.parseXmlSource(XlsxSaxAnalyser.java:183)
at com.alibaba.excel.analysis.v07.XlsxSaxAnalyser.execute(XlsxSaxAnalyser.java:201)
at com.alibaba.excel.analysis.ExcelAnalyserImpl.analysis(ExcelAnalyserImpl.java:115)
at com.alibaba.excel.ExcelReader.read(ExcelReader.java:182)
at com.qiyee.web.controller.common.dfdfdfdfd.dkehnt(dfdfdfdfd.java:89)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68)
at com.intellij.rt.execution.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:47)
at com.intellij.rt.execution.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:242)
at com.intellij.rt.execution.junit.JUnitStarter.main(JUnitStarter.java:70)
Caused by: org.xml.sax.SAXParseException; lineNumber: 44476; columnNumber: 81; Invalid byte 2 of 4-byte UTF-8 sequence. at org.apache.xerces.parsers.AbstractSAXParser.parse(Unknown Source) at org.apache.xerces.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown Source) at com.alibaba.excel.analysis.v07.XlsxSaxAnalyser.parseXmlSource(XlsxSaxAnalyser.java:178) ... 26 more Caused by: org.apache.xerces.impl.io.MalformedByteSequenceException: Invalid byte 2 of 4-byte UTF-8 sequence. at org.apache.xerces.impl.io.UTF8Reader.invalidByte(Unknown Source) at org.apache.xerces.impl.io.UTF8Reader.read(Unknown Source) at org.apache.xerces.impl.XMLEntityScanner.load(Unknown Source) at org.apache.xerces.impl.XMLEntityScanner.scanLiteral(Unknown Source) at org.apache.xerces.impl.XMLScanner.scanAttributeValue(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanAttribute(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanStartElement(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl$FragmentContentDispatcher.dispatch(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source) at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) ... 29 more
使用wps 打开excel 找到报错的那一行数据,查看 并无特殊字符,查看excel zip解压后的文件 并无异常.
将多余的sheet页删除后 单独跑有问题的sheet 就是ok的
提供下excel
我也想看这个问题!!
见信好: 附件是excel,辛苦!
------------------ 原始邮件 ------------------ 发件人: "alibaba/easyexcel" @.>; 发送时间: 2022年8月2日(星期二) 晚上9:16 @.>; @.@.>; 主题: Re: [alibaba/easyexcel] 读取 poi 生成的excel报错 com.alibaba.excel.exception.ExcelAnalysisException: org.xml.sax.SAXParseException; lineNumber: 44476; columnNumber: 81; Invalid byte 2 of 4-byte UTF-8 sequence. (Issue #2622)
我也想看这个问题!!
— Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>
从QQ邮箱发来的超大附件
Job_Seeker_V15.xlsx (234.60M, 2022年09月02日 09:34 到期)进入下载页面:http://mail.qq.com/cgi-bin/ftnExs_download?t=exs_ftn_download&k=79336262c4156f94745c6b6d44640b4a040501535a0100501c57545503490d5700504f035a570b485301515b51500c0408015a046277392f5e513d3107015200436c3453574a4109424b625f&code=13bbbd9e
@zhuangjiaju 附件比较大 我用邮箱回复了 http://mail.qq.com/cgi-bin/ftnExs_download?t=exs_ftn_download&k=79336262c4156f94745c6b6d44640b4a040501535a0100501c57545503490d5700504f035a570b485301515b51500c0408015a046277392f5e513d3107015200436c3453574a4109424b625f&code=13bbbd9e
提供下excel
http://mail.qq.com/cgi-bin/ftnExs_download?t=exs_ftn_download&k=79336262c4156f94745c6b6d44640b4a040501535a0100501c57545503490d5700504f035a570b485301515b51500c0408015a046277392f5e513d3107015200436c3453574a4109424b625f&code=13bbbd9e
提供下excel
http://mail.qq.com/cgi-bin/ftnExs_download?t=exs_ftn_download&k=79336262c4156f94745c6b6d44640b4a040501535a0100501c57545503490d5700504f035a570b485301515b51500c0408015a046277392f5e513d3107015200436c3453574a4109424b625f&code=13bbbd9e
我跑完了没啥异常。。最后报错到底是zip文件还是excel文件?zip又在里面干了啥事?是哪个版本有这个问题呢?可以描述的清楚一点,或者加我私聊一哈~
@chenzhongyu11 我把poi 生成excel 的 cellStyle 设置为文本(之前都是默认常规) 尝试了下发现 easyExcel 3.1.0 读取无异常了。 十分费解
见信好: 附件是excel ,辛苦了!万分感谢
------------------ 原始邮件 ------------------ 发件人: "alibaba/easyexcel" @.>; 发送时间: 2022年8月2日(星期二) 晚上8:21 @.>; @.@.>; 主题: Re: [alibaba/easyexcel] 读取 poi 生成的excel报错 com.alibaba.excel.exception.ExcelAnalysisException: org.xml.sax.SAXParseException; lineNumber: 44476; columnNumber: 81; Invalid byte 2 of 4-byte UTF-8 sequence. (Issue #2622)
提供下excel
— Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>
从QQ邮箱发来的超大附件
Job_Seeker_V15.xlsx (234.60M, 2022年09月02日 09:29 到期)进入下载页面:http://mail.qq.com/cgi-bin/ftnExs_download?t=exs_ftn_download&k=7d393435c71760c370563d3a4766041d0d0a000050545350180f0207074b020b575d190c0056011f075b575157000e0a065c0c07617536785a5b6b6604035d574766620454484e5e46413408&code=5945af62