怎样在Android中解析doc、docx、xls、xlsx格式文件
http://www.open-open.com/home/space-37924-do-blog-id-5872.html解析doc,要tm-extractors-0.4.jar这个包
解析xls,要jxl.jar这个包
解析xls
解析xlsx
01 |
public static String readDOC(String path) {
|
02 |
// 创建输入流读取doc文件
|
03 |
FileInputStream in;
|
04 |
String text = null;
|
05 |
// Environment.getExternalStorageDirectory().getAbsolutePath()+ "/aa.doc") |
06 |
try {
|
07 |
in = new FileInputStream(new File(path));
|
08 |
int a= in.available();
|
09 |
WordExtractor extractor = null;
|
10 |
// 创建WordExtractor
|
11 |
extractor = new WordExtractor();
|
12 |
// 对doc文件进行提取
|
13 |
text = extractor.extractText(in);
|
14 |
System.out.println("解析得到的东西"+text);
|
15 |
} catch (FileNotFoundException e) {
|
16 |
e.printStackTrace();
|
17 |
} catch (Exception e) {
|
18 |
e.printStackTrace();
|
19 |
}
|
20 |
if (text == null) {
|
21 |
text = "解析文件出现问题";
|
22 |
}
|
23 |
return text;
|
24 |
}
|
01 |
public static String readXLS(String path) {
|
02 |
String str = "";
|
03 |
try {
|
04 |
Workbook workbook = null;
|
05 |
workbook = Workbook.getWorkbook(new File(path));
|
06 |
Sheet sheet = workbook.getSheet(0);
|
07 |
Cell cell = null;
|
08 |
int columnCount = sheet.getColumns();
|
09 |
int rowCount = sheet.getRows();
|
10 |
for (int i = 0; i < rowCount; i++) {
|
11 |
for (int j = 0; j < columnCount; j++) {
|
12 |
cell = sheet.getCell(j, i);
|
13 |
String temp2 = "";
|
14 |
if (cell.getType() == CellType.NUMBER) {
|
15 |
temp2 = ((NumberCell) cell).getValue() + "";
|
16 |
} else if (cell.getType() == CellType.DATE) {
|
17 |
temp2 = "" + ((DateCell) cell).getDate();
|
18 |
} else {
|
19 |
temp2 = "" + cell.getContents();
|
20 |
}
|
21 |
str = str + " " + temp2;
|
22 |
}
|
23 |
str += "\n";
|
24 |
}
|
25 |
workbook.close();
|
26 |
} catch (Exception e) {
|
27 |
}
|
28 |
if (str == null) {
|
29 |
str = "解析文件出现问题";
|
30 |
}
|
31 |
return str;
|
32 |
33 |
}
|
解析docx
01 |
public static String readDOCX(String path) {
|
02 |
String river = "";
|
03 |
try {
|
04 |
ZipFile xlsxFile = new ZipFile(new File(path));
|
05 |
ZipEntry sharedStringXML = xlsxFile.getEntry("word/document.xml");
|
06 |
InputStream inputStream = xlsxFile.getInputStream(sharedStringXML);
|
07 |
XmlPullParser xmlParser = Xml.newPullParser();
|
08 |
xmlParser.setInput(inputStream, "utf-8");
|
09 |
int evtType = xmlParser.getEventType();
|
10 |
while (evtType != XmlPullParser.END_DOCUMENT) {
|
11 |
switch (evtType) {
|
12 |
case XmlPullParser.START_TAG:
|
13 |
String tag = xmlParser.getName();
|
14 |
System.out.println(tag);
|
15 |
if (tag.equalsIgnoreCase("t")) {
|
16 |
river += xmlParser.nextText() + "\n";
|
17 |
}
|
18 |
break;
|
19 |
case XmlPullParser.END_TAG:
|
20 |
break;
|
21 |
default:
|
22 |
break;
|
23 |
}
|
24 |
evtType = xmlParser.next();
|
25 |
}
|
26 |
} catch (ZipException e) {
|
27 |
e.printStackTrace();
|
28 |
} catch (IOException e) {
|
29 |
e.printStackTrace();
|
30 |
} catch (XmlPullParserException e) {
|
31 |
e.printStackTrace();
|
32 |
}
|
33 |
if (river == null) {
|
34 |
river = "解析文件出现问题";
|
35 |
}
|
36 |
37 |
return river;
|
38 |
}
|
01 |
public static String readXLSX(String path) {
|
02 |
String str = "";
|
03 |
String v = null;
|
04 |
boolean flat = false;
|
05 |
List<String> ls = new ArrayList<String>();
|
06 |
try {
|
07 |
ZipFile xlsxFile = new ZipFile(new File(path));
|
08 |
ZipEntry sharedStringXML = xlsxFile
|
09 |
.getEntry("xl/sharedStrings.xml");
|
10 |
InputStream inputStream = xlsxFile.getInputStream(sharedStringXML);
|
11 |
XmlPullParser xmlParser = Xml.newPullParser();
|
12 |
xmlParser.setInput(inputStream, "utf-8");
|
13 |
int evtType = xmlParser.getEventType();
|
14 |
while (evtType != XmlPullParser.END_DOCUMENT) {
|
15 |
switch (evtType) {
|
16 |
case XmlPullParser.START_TAG:
|
17 |
String tag = xmlParser.getName();
|
18 |
if (tag.equalsIgnoreCase("t")) {
|
19 |
ls.add(xmlParser.nextText());
|
20 |
}
|
21 |
break;
|
22 |
case XmlPullParser.END_TAG:
|
23 |
break;
|
24 |
default:
|
25 |
break;
|
26 |
}
|
27 |
evtType = xmlParser.next();
|
28 |
}
|
29 |
ZipEntry sheetXML = xlsxFile.getEntry("xl/worksheets/sheet1.xml");
|
30 |
InputStream inputStreamsheet = xlsxFile.getInputStream(sheetXML);
|
31 |
XmlPullParser xmlParsersheet = Xml.newPullParser();
|
32 |
xmlParsersheet.setInput(inputStreamsheet, "utf-8");
|
33 |
int evtTypesheet = xmlParsersheet.getEventType();
|
34 |
while (evtTypesheet != XmlPullParser.END_DOCUMENT) {
|
35 |
switch (evtTypesheet) {
|
36 |
case XmlPullParser.START_TAG:
|
37 |
String tag = xmlParsersheet.getName();
|
38 |
if (tag.equalsIgnoreCase("row")) {
|
39 |
} else if (tag.equalsIgnoreCase("c")) {
|
40 |
String t = xmlParsersheet.getAttributeValue(null, "t");
|
41 |
if (t != null) {
|
42 |
flat = true;
|
43 |
System.out.println(flat + "有");
|
44 |
} else {
|
45 |
System.out.println(flat + "没有");
|
46 |
flat = false;
|
47 |
}
|
48 |
} else if (tag.equalsIgnoreCase("v")) {
|
49 |
v = xmlParsersheet.nextText();
|
50 |
if (v != null) {
|
51 |
if (flat) {
|
52 |
str += ls.get(Integer.parseInt(v)) + " ";
|
53 |
} else {
|
54 |
str += v + " ";
|
55 |
}
|
56 |
}
|
57 |
}
|
58 |
break;
|
59 |
case XmlPullParser.END_TAG:
|
60 |
if (xmlParsersheet.getName().equalsIgnoreCase("row")
|
61 |
&& v != null) {
|
62 |
str += "\n";
|
63 |
}
|
64 |
break;
|
65 |
}
|
66 |
evtTypesheet = xmlParsersheet.next();
|
67 |
}
|
68 |
System.out.println(str);
|
69 |
} catch (ZipException e) {
|
70 |
e.printStackTrace();
|
71 |
} catch (IOException e) {
|
72 |
e.printStackTrace();
|
73 |
} catch (XmlPullParserException e) {
|
74 |
e.printStackTrace();
|
75 |
}
|
76 |
if (str == null) {
|
77 |
str = "解析文件出现问题";
|
78 |
}
|
79 |
80 |
return str;
|