你的位置:首页 > Java教程

[Java教程]【亟梦】POI基于事件驱动解析大数据量2007版本Excel,空值导致列错位问题


1.目前测试了20M的文件,可以读取。

2.支持单个工作表1万+的数据行数,耗时如图。

3.以下是关键地方处理的代码

 1      //Accepts objects needed while parsing.  2     // @param styles Table of styles  3     // @param strings Table of shared strings  4     // @param cols  Minimum number of columns to show  5     // @param target Sink for output  6     public MyXSSFSheetHandler(  7         StylesTable styles,  8         ReadOnlySharedStringsTable strings,  9         int cols,  10         PrintStream target) {  11       this.stylesTable = styles;  12       this.sharedStringsTable = strings;  13       this.minColumnCount = cols;  14       this.output = target;  15       this.value = new StringBuffer();  16       this.nextDataType = xssfDataType.NUMBER;  17       this.formatter = new DataFormatter();  18       rowlist = new ArrayList<String>(0); 19       rowReader = new RowReader(); 20       rowMap = new HashMap<Integer, String>(0); 21       rowString = new StringBuffer(); 22     } 23     // @see org. 24     public void startElement(String uri, String localName, String name,  25                 Attributes attributes) throws SAXException  26     {  27   28        29       if ("inlineStr".equals(name) || "v".equals(name))  30       {  31         vIsOpen = true;  32         // Clear contents cache  33         value.setLength(0);  34       }  35       // c => cell  36       else if ("c".equals(name))  37       {  38         // Get the cell reference  39         String r = attributes.getValue("r");  40         int firstDigit = -1;  41         for (int c = 0; c < r.length(); ++c)  42         {  43           if (Character.isDigit(r.charAt(c)))  44           {  45             firstDigit = c;  46             break;  47           }  48         }  49         thisColumn = nameToColumn(r.substring(0, firstDigit));  50   51         // Set up defaults.  52         this.nextDataType = xssfDataType.NUMBER;  53         this.formatIndex = -1;  54         this.formatString = null;  55         String cellType = attributes.getValue("t");  56         String cellStyleStr = attributes.getValue("s");  57         if ("b".equals(cellType))  58           nextDataType = xssfDataType.BOOL;  59         else if ("e".equals(cellType))  60           nextDataType = xssfDataType.ERROR;  61         else if ("inlineStr".equals(cellType))  62           nextDataType = xssfDataType.INLINESTR;  63         else if ("s".equals(cellType))  64           nextDataType = xssfDataType.SSTINDEX;  65         else if ("str".equals(cellType))  66           nextDataType = xssfDataType.FORMULA;  67         else if (cellStyleStr != null) {  68           // It's a number, but almost certainly one  69           // with a special style or format  70           int styleIndex = Integer.parseInt(cellStyleStr);  71           XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);  72           this.formatIndex = style.getDataFormat();  73           this.formatString = style.getDataFormatString();  74           if (this.formatString == null)  75             this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);  76         }  77       }  78   79     }  80   81  82     // @see org. 83     public void endElement(String uri, String localName, String name)  84         throws SAXException  85     {  86   87       String thisStr = null;  88        89       // v => contents of a cell  90       if ("v".equals(name))  91       {  92         // Process the value contents as required.  93         // Do now, as characters() may be called more than once  94         switch (nextDataType) {  95   96           case BOOL:  97             char first = value.charAt(0);  98             thisStr = first == '0' ? "FALSE" : "TRUE";  99             break; 100  101           case ERROR: 102             thisStr = "\"ERROR:" + value.toString() + '"'; 103             break; 104  105           case FORMULA: 106             // A formula could result in a string value, 107             // so always add double-quote characters. 108             thisStr = '"' + value.toString() + '"'; 109             break; 110  111           case INLINESTR: 112             // TODO: have seen an example of this, so it's untested. 113             XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); 114             thisStr = '"' + rtsi.toString() + '"'; 115             break; 116  117           case SSTINDEX: 118             String sstIndex = value.toString(); 119             try { 120               int idx = Integer.parseInt(sstIndex); 121               XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx)); 122               thisStr = '"' + rtss.toString() + '"'; 123             } catch (NumberFormatException ex) { 124               output.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString()); 125             } 126             break; 127  128           case NUMBER: 129             String n = value.toString(); 130             if (this.formatString != null) 131               thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); 132             else 133               thisStr = n; 134             break; 135  136           default: 137             thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; 138             break; 139         } 140  141         // Output after we've seen the string contents 142         // Emit commas for any fields that were missing on this row 143         if (lastColumnNumber == -1) 144         { 145           lastColumnNumber = 0; 146         } 147         for (int i = lastColumnNumber; i < thisColumn; ++i) 148         { 149           rowString.append(',');//每天加一个单元格的值到字符串中就追加一个逗号(末尾不添加)150           //output.print(','); 可以看到使用output是可以将每一个单元格使用逗号分割
              //但是如果使用rowlist添加到列表中,却始终无法得到空单元格的内容
              //也就是说:空单元格被忽略了。
              //具体请参照标红的地方进行处理:使用字符串拼接的方式获得完整的行数据,再使用逗号拆分组合成rowMap
151 }152 rowString.append(thisStr);// 这条code放在for后面,如果放在前面,会导致0和1两个单元格合为一个单元格。153 // Might be the empty string. 154 //output.print(thisStr);155 rowlist.add(thisStr);156 // Update column 157 if (thisColumn > -1)158 {159 lastColumnNumber = thisColumn;160 }161 rowIndex++;162 } 163 else if ("row".equals(name)) 164 { 165 166 // Print out any missing commas if needed 167 if (minColumns > 0) 168 { 169 // Columns are 0 based 170 if (lastColumnNumber == -1) 171 { 172 lastColumnNumber = 0; 173 } 174 for (int i = lastColumnNumber; i < (this.minColumnCount); i++) 175 { 176 output.print(',');177 } 178 } 179 180 // We're onto a new row 181 182 output.println(); 183 output.println(countrows++); 184 lastColumnNumber = -1; 185 rowIndex = 0;186 //rowMap = rowReader.getRowMap(rowlist);187 rowMap = rowReader.getRowMapByString(rowString.toString());188 // ADD =189 rowLst1000.add(rowMap);190 rowMap = null;191 rowMap = new HashMap<Integer, String>(0);192 if (countrows % 1000 == 0)193 {194 rowLst1000n.add(rowLst1000);195 rowLst1000 = null;196 rowLst1000 = new ArrayList<Map<Integer, String>>(0);197 }198 rowlist.clear();199 System.out.println(rowString.toString());200 rowString = null;201 rowString = new StringBuffer();202 } 203 }

以上是我自己的处理方式,当然还有其他的处理方式,再研究吧。毕竟写到此处的时候,我不过是一个不到1年经验的小菜鸟。

下面附上其余代码的参照地址:

java使用POI通过事件模型解析超过40M的Excel文件,解决空单元格问题

http://www.360sdn.com/java/2014/0524/3392.html

 

【做而会】