在Adding a row to a large xlsx file (Out of Memory) 中,我提供了一种使用StAX 将行写入Excel 工作表的方法,而无需打开整个工作簿。但是使用了共享字符串表。
所以这里有一个稍微修改过的版本。
您将开始拥有这样的ReadAndWriteTest.xlsx:
每次运行代码时,将添加 100,000 行,在列 A 中包含一个随机字符串,在列 B 中包含一个随机双精度值。字符串将由共享字符串表管理。因此,这个共享字符串表中的唯一字符串将比工作表中的字符串总和少得多。
我在生产性使用中使用了这种方法,当然,代码更复杂、更有条理,因为此代码示例仅应以简单代码显示该方法。它运行良好,比SXSSF 性能更高,并提供阅读和写作。
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.XMLEvent;
import javax.xml.namespace.QName;
import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import java.util.concurrent.ThreadLocalRandom;
class StaxReadAndWriteTest {
public static void main(String[] args) {
try {
String loremipsum = "Lorem ipsum dolor sit amet ne mei euismod interpretaris est te iusto causae doctus.";
File file = new File("ReadAndWriteTest.xlsx");
OPCPackage opcpackage = OPCPackage.open(file);
//if there are strings in the sheet data, we need the SharedStringsTable
PackagePart sharedstringstablepart = opcpackage.getPartsByName(Pattern.compile("/xl/sharedStrings.xml")).get(0);
SharedStringsTable sharedstringstable = new SharedStringsTable();
sharedstringstable.readFrom(sharedstringstablepart.getInputStream());
PackagePart sheetpart = opcpackage.getPartsByName(Pattern.compile("/xl/worksheets/sheet1.xml")).get(0);
XMLEventReader reader = XMLInputFactory.newInstance().createXMLEventReader(sheetpart.getInputStream());
XMLEventWriter writer = XMLOutputFactory.newInstance().createXMLEventWriter(sheetpart.getOutputStream());
XMLEventFactory eventFactory = XMLEventFactory.newInstance();
int rowsCount = 0;
while(reader.hasNext()){ //loop over all XML in sheet1.xml
XMLEvent event = (XMLEvent)reader.next();
writer.add(event); //by default write each readed event
if(event.isStartElement()){
StartElement startElement = (StartElement)event;
QName startElementName = startElement.getName();
if(startElementName.getLocalPart().equalsIgnoreCase("row")) { //start element of row
boolean rowStart = true;
rowsCount++;
do {
event = (XMLEvent)reader.next(); //find this row's end
writer.add(event); //by default write each readed event
if(event.isEndElement()){
EndElement endElement = (EndElement)event;
QName endElementName = endElement.getName();
if(endElementName.getLocalPart().equalsIgnoreCase("row")) { //end element of row
rowStart = false;
//we assume that there is nothing else (character data) between end element of row and next element
XMLEvent nextElement = (XMLEvent)reader.peek();
QName nextElementName = null;
if (nextElement.isStartElement()) nextElementName = ((StartElement)nextElement).getName();
else if (nextElement.isEndElement()) nextElementName = ((EndElement)nextElement).getName();
if(!nextElementName.getLocalPart().equalsIgnoreCase("row")) { //next is not start element of row
//we have the last row, so we write new rows now
for (int i = 0; i < 100000; i++) {
StartElement newRowStart = eventFactory.createStartElement(new QName("row"), null, null);
writer.add(newRowStart);
//start cell A
Attribute attribute = eventFactory.createAttribute("t", "s");
List attributeList = Arrays.asList(attribute);
StartElement newCellStart = eventFactory.createStartElement(new QName("c"), attributeList.iterator(), null);
writer.add(newCellStart);
CTRst ctstr = CTRst.Factory.newInstance();
//create a random string from loremipsum
int length = ThreadLocalRandom.current().nextInt(5, 20);
int index = ThreadLocalRandom.current().nextInt(0, loremipsum.length() - length);
//set randoom string in CTRst
ctstr.setT(loremipsum.substring(index, index + length).trim());
//update SharedStringsTable with CTRst and get sRef as the ID of this string
int sRef = sharedstringstable.addEntry(ctstr);
StartElement newCellValue = eventFactory.createStartElement(new QName("v"), null, null);
writer.add(newCellValue);
//set sRef of the string as content of cell A
Characters value = eventFactory.createCharacters(Integer.toString(sRef));
writer.add(value);
EndElement newCellValueEnd = eventFactory.createEndElement(new QName("v"), null);
writer.add(newCellValueEnd);
EndElement newCellEnd = eventFactory.createEndElement(new QName("c"), null);
writer.add(newCellEnd);
//end cell A
//start cell B
newCellStart = eventFactory.createStartElement(new QName("c"), null, null);
writer.add(newCellStart);
newCellValue = eventFactory.createStartElement(new QName("v"), null, null);
writer.add(newCellValue);
//set random double value as content of cell B
value = eventFactory.createCharacters(""+ThreadLocalRandom.current().nextDouble((double)length));
writer.add(value);
newCellValueEnd = eventFactory.createEndElement(new QName("v"), null);
writer.add(newCellValueEnd);
newCellEnd = eventFactory.createEndElement(new QName("c"), null);
writer.add(newCellEnd);
//end cell B
EndElement newRowEnd = eventFactory.createEndElement(new QName("row"), null);
writer.add(newRowEnd);
rowsCount++;
}
}
}
}
} while (rowStart);
}
}
}
writer.flush();
//write the SharedStringsTable
OutputStream out = sharedstringstablepart.getOutputStream();
sharedstringstable.writeTo(out);
out.close();
opcpackage.close();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}