需要使用的maven依赖
<!--注意版本保持一致 poi poi-ooxml poi-scratchpad--><dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>4.1.2</version></dependency><!-- 操作doc ppt xls --><dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>4.1.2</version></dependency><!-- 操作docx pptx xlsx --><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>4.1.2</version></dependency><dependency><groupId>fr.opensagres.xdocreport</groupId><artifactId>fr.opensagres.verter.xhtml</artifactId><version>2.0.2</version></dependency>
ample;import org.apache.poi.hwpf.HWPFDocument;
import org.apache.verter.WordToHtmlConverter;
import org.w3c.dom.Document;
l.parsers.DocumentBuilderFactory;
l.transform.OutputKeys;
l.transform.Transformer;
l.transform.TransformerFactory;
l.transform.dom.DOMSource;
l.transform.stream.StreamResult;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;public class DocToHtml {public static void main(String[] args) {//doc文件使用HWPFDocument读取,docx文件使用XWPFDocument读取String filePath="C:\Users\Administrator\Desktop\doc测试.doc";File file = new File(filePath);try {FileInputStream inputStream = new FileInputStream(file);HWPFDocument hwpfDocument = new HWPFDocument(inputStream);//构造函数放入文件流得到HWPFDocument对象WordToHtmlConverter wordToHtmlConverter = new wInstance().newDocumentBuilder().newDocument());//构造WordToHtmlConverter对象//开始解析doc文档---------------------------------------------wordToHtmlConverter.processDocument(hwpfDocument);Document document = Document();//通过TransformerFactory创造出Transformer ,并设置Transformer的属性TransformerFactory transformerFactory = wInstance();Transformer transformer = wTransformer();transformer.setOutputProperty(OutputKeys.ENCODING,"UTF-8");transformer.setOutputProperty(OutputKeys.INDENT,"yes");transformer.setOutputProperty(OutputKeys.METHOD,"html");//ansform()需要参数1 Source 参数2 ResultDOMSource domSource = new DOMSource(document);ByteArrayOutputStream outputtarget = new ByteArrayOutputStream();StreamResult streamResult = new StreamResult(outputtarget);//开始转换,结果数据在ByteArrayOutputStream里ansform(domSource,streamResult);//参数1 Source 参数2 Result//转成字符串看看String string = String("utf-8");System.out.println(string);} catch (Exception e) {throw new RuntimeException(e);}}
}
输出:
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<style type="text/css">.b1{white-space-collapsing:preserve;}
.b2{margin: 1.0in 1.25in 1.0in 1.25in;}
.s1{vertical-align:super;font-size:smaller;}
.s2{font-weight:bold;}
.s3{font-size:16pt;}
.s4{font-size:22pt;font-weight:bold;}
.p1{text-align:justify;hyphenate:auto;font-family:Calibri;font-size:10pt;}
</style>
<meta content="Administrator" name="author">
</head>
<body class="b1 b2">
<p class="p1">
<span>X</span><span class="s1">2</span>
</p>
<p class="p1"></p>
<p class="p1">
<span>hhhhhhhhhh</span><span class="s2">hhhhhh</span><span>hhhh</span><span class="s3">hhhh</span><span>hhh</span><span class="s4">h</span><span>hh</span>
</p>
</body>
</html>
ample;import fr.opensagres.verter.xhtml.Base64EmbedImgManager;
import fr.opensagres.verter.xhtml.XHTMLConverter;
import fr.opensagres.verter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;public class DocxToHtml {public static void main(String[] args) {String filePath="C:\Users\Administrator\Desktop\docx测试.docx";File file = new File(filePath);try {FileInputStream inputStream = new FileInputStream(file);//创建操作docx word的对象XWPFDocument xwpfDocument = new XWPFDocument(inputStream);//解析XHTML配置XHTMLOptions xhtmlOptions = ate();// 将样式都写为内联样式,而不是写到style标签中 默认falsexhtmlOptions.setFragment(true);xhtmlOptions.setIgnoreStylesIfUnused(false);xhtmlOptions.setImageManager(new Base64EmbedImgManager());//图片用base64转化//将XWPFDocument转化成HTMLByteArrayOutputStream outputtarget = new ByteArrayOutputStream();Instance().convert(xwpfDocument,outputtarget,xhtmlOptions);//转成字符串看看String string = String("utf-8");System.out.println(string);} catch (Exception e) {throw new RuntimeException(e);}}
}
输出:
<div style="width:595.3pt;margin-bottom:72.0pt;margin-top:72.0pt;margin-left:90.0pt;margin-right:90.0pt;"><p style="white-space:pre-wrap;"><span style="white-space:pre-wrap;">X</span><span style="font-family:'Calibri';font-size:10.0pt;vertical-align:super;">2</span></p><p style="white-space:pre-wrap;"><br/></p><p style="white-space:pre-wrap;"><span style="white-space:pre-wrap;">sjhhfios</span><span style="font-weight:bold;white-space:pre-wrap;">afjoajdp</span><span style="white-space:pre-wrap;">asj</span><span id="_GoBack"/></p></div>
格式化一下好看:
<div style="width:595.3pt;margin-bottom:72.0pt;margin-top:72.0pt;margin-left:90.0pt;margin-right:90.0pt;"><p style="white-space:pre-wrap;"><span style="white-space:pre-wrap;">X</span><spanstyle="font-family:'Calibri';font-size:10.0pt;vertical-align:super;">2</span></p><p style="white-space:pre-wrap;"><br /></p><p style="white-space:pre-wrap;"><span style="white-space:pre-wrap;">sjhhfios</span><spanstyle="font-weight:bold;white-space:pre-wrap;">afjoajdp</span><spanstyle="white-space:pre-wrap;">asj</span><span id="_GoBack" /></p>
</div>
本文发布于:2024-02-01 14:19:49,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170676838937206.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |