aademeng

Jakarta POI 是apache的子项目,目标是处理ole2对象。它提供了一组操纵Windows文档的Java API。在网上见到好多通过poi读取excel的文章,读写也很方便,和jxl有的一比。在这里,主要是poi对word中的表格数据读取。

具体见代码

  1.  
    import java.io.File;
  2.  
    import java.io.FileInputStream;
  3.  
    import java.io.FileNotFoundException;
  4.  
     
  5.  
    import org.apache.poi.hwpf.HWPFDocument;
  6.  
    import org.apache.poi.hwpf.usermodel.Paragraph;
  7.  
    import org.apache.poi.hwpf.usermodel.Range;
  8.  
    import org.apache.poi.hwpf.usermodel.Table;
  9.  
    import org.apache.poi.hwpf.usermodel.TableCell;
  10.  
    import org.apache.poi.hwpf.usermodel.TableIterator;
  11.  
    import org.apache.poi.hwpf.usermodel.TableRow;
  12.  
     
  13.  
    import java.io.File;
  14.  
    import java.io.FileInputStream;
  15.  
    import java.io.InputStream;
  16.  
     
  17.  
    import org.apache.poi.POIXMLDocument;
  18.  
    import org.apache.poi.POIXMLTextExtractor;
  19.  
    import org.apache.poi.hwpf.extractor.WordExtractor;
  20.  
    import org.apache.poi.openxml4j.opc.OPCPackage;
  21.  
    import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
  22.  
     
  23.  
     
  24.  
    import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  25.  
     
  26.  
    public class ExportDocImpl
  27.  
    {
  28.  
    public void testWord(){
  29.  
    try{
  30.  
    FileInputStream in = new FileInputStream("D:\\sinye.doc");//载入文档
  31.  
    POIFSFileSystem pfs = new POIFSFileSystem(in);
  32.  
    HWPFDocument hwpf = new HWPFDocument(pfs);
  33.  
    Range range = hwpf.getRange();//得到文档的读取范围
  34.  
    TableIterator it = new TableIterator(range);
  35.  
    //迭代文档中的表格
  36.  
    while (it.hasNext()) {
  37.  
    Table tb = (Table) it.next();
  38.  
    //迭代行,默认从0开始
  39.  
    for (int i = 0; i < tb.numRows(); i++) {
  40.  
    TableRow tr = tb.getRow(i);
  41.  
    //迭代列,默认从0开始
  42.  
    for (int j = 0; j < tr.numCells(); j++) {
  43.  
    TableCell td = tr.getCell(j);//取得单元格
  44.  
    //取得单元格的内容
  45.  
    for(int k=0;k<td.numParagraphs();k++){
  46.  
    Paragraph para =td.getParagraph(k);
  47.  
    String s = para.text();
  48.  
    System.out.println(s);
  49.  
    } //end for
  50.  
    } //end for
  51.  
    } //end for
  52.  
    } //end while
  53.  
    }catch(Exception e){
  54.  
    e.printStackTrace();
  55.  
    }
  56.  
    }//end method
  57.  
     
  58.  
     
  59.  
    public void testWord1(){
  60.  
    try {
  61.  
    //word 2003: 图片不会被读取
  62.  
    InputStream is = new FileInputStream(new File("D:\\sinye.doc"));
  63.  
    WordExtractor ex = new WordExtractor(is);
  64.  
    String text2003 = ex.getText();
  65.  
    System.out.println(text2003);
  66.  
    //word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
  67.  
    OPCPackage opcPackage = POIXMLDocument.openPackage("D:\\sinye.doc");
  68.  
    POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
  69.  
    String text2007 = extractor.getText();
  70.  
    System.out.println(text2007);
  71.  
     
  72.  
    } catch (Exception e) {
  73.  
    e.printStackTrace();
  74.  
    }
  75.  
    }
  76.  
    }

 

分类:

技术点:

相关文章:

  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2021-11-18
  • 2022-03-03
  • 2021-07-24
猜你喜欢
  • 2021-06-20
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2021-12-19
  • 2021-12-19
相关资源
相似解决方案