【发布时间】:2021-01-20 04:24:46
【问题描述】:
我正在使用 OpenXMLPowerTools 将文档 (docx) 转换为 html 文件的功能,但我遇到了 .emf 图像。
文档中的所有图片都是 .emf 图片,无法转换为 html。
我进行了研究,似乎 OpenXMLPowerTools 不支持此扩展?我说的对吗?
是否有人在使用此工具时遇到此问题?
这是我当前的代码(VB.NET):
Sub Main()
Dim objSource As Object = Nothing
Dim wdDoc As WordprocessingDocument
Dim fi As FileInfo
Dim destFileName As FileInfo
Dim di As DirectoryInfo
Dim strImageDirectoryName As String = String.Empty
Dim iImageCounter As Integer = 0
Dim part As CoreFilePropertiesPart
Dim strPageTitle As String = String.Empty
Dim HtmlSettings As WmlToHtmlConverterSettings
Dim strExtension As String = String.Empty
Dim imageFormat As ImageFormat = Nothing
Dim strBase64 As String = String.Empty
Dim strMimeType As String = String.Empty
Dim format As ImageFormat
Dim codec As ImageCodecInfo
Dim strImageSource As String = String.Empty
Dim img As XElement
Dim arrByte As Byte()
Dim htmlElement As XElement
Dim html As XDocument
Dim strHtml As String = String.Empty
Dim strFilePath As String = String.Empty
Dim strOutputDirectory As String = String.Empty
Dim objWriter As Object = Nothing
Dim localDirInfo As DirectoryInfo
strFilePath = "FilePath"
strOutputDirectory = "OutputPath"
fi = New FileInfo(strFilePath)
destFileName = New FileInfo(fi.Name.Replace(".docx", ".html"))
If strOutputDirectory IsNot Nothing AndAlso strOutputDirectory <> String.Empty Then
di = New DirectoryInfo(strOutputDirectory)
If Not di.Exists Then
Throw New OpenXmlPowerToolsException("Output directory does not exist")
End If
destFileName = New FileInfo(Path.Combine(di.FullName, destFileName.Name))
End If
objSource = Packaging.Package.Open(strFilePath)
wdDoc = WordprocessingDocument.Open(objSource)
strImageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) & "_files"
part = wdDoc.CoreFilePropertiesPart
strPageTitle = fi.FullName
If part IsNot Nothing Then
strPageTitle = If(CStr(part.GetXDocument().Descendants(DC.title).FirstOrDefault()), fi.FullName)
End If
strImageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) & "_files"
HtmlSettings = New WmlToHtmlConverterSettings
Dim imgPart = wdDoc.MainDocumentPart.ImageParts
With HtmlSettings
.PageTitle = strPageTitle
.FabricateCssClasses = True
.CssClassPrefix = "pt-"
.RestrictToSupportedLanguages = False
.RestrictToSupportedNumberingFormats = False
.ImageHandler = Function(imageinfo)
localDirInfo = New DirectoryInfo(strImageDirectoryName)
If Not localDirInfo.Exists Then localDirInfo.Create()
iImageCounter += 1
strExtension = imageinfo.ContentType.Split("/"c)(1).ToLower()
If strExtension = "png" Then
imageFormat = ImageFormat.Png
ElseIf strExtension = "gif" Then
imageFormat = ImageFormat.Gif
ElseIf strExtension = "bmp" Then
imageFormat = ImageFormat.Bmp
ElseIf strExtension = "jpeg" Then
imageFormat = ImageFormat.Jpeg
ElseIf strExtension = "tiff" Then
strExtension = "gif"
imageFormat = ImageFormat.Gif
ElseIf strExtension = "x-wmf" Then
strExtension = "wmf"
imageFormat = ImageFormat.Wmf
ElseIf strExtension = "x-emf" Then
strExtension = "emf"
imageFormat = ImageFormat.Emf
End If
If imageFormat Is Nothing Then Return Nothing
Using ms As MemoryStream = New MemoryStream()
imageinfo.Bitmap.Save(ms, imageFormat)
arrByte = ms.ToArray()
strBase64 = Convert.ToBase64String(arrByte)
End Using
format = imageinfo.Bitmap.RawFormat
codec = ImageCodecInfo.GetImageDecoders().First(Function(c) c.FormatID = format.Guid)
strMimeType = codec.MimeType
strImageSource = String.Format("data:{0};base64,{1}", strMimeType, strBase64)
img = New XElement(Xhtml.img,
New XAttribute(NoNamespace.src, strImageSource), imageinfo.ImgStyleAttribute,
If(imageinfo.AltText IsNot Nothing, New XAttribute(NoNamespace.alt, imageinfo.AltText), Nothing))
Return img
End Function
End With
htmlElement = WmlToHtmlConverter.ConvertToHtml(wdDoc, HtmlSettings)
html = New XDocument(New XDocumentType("html", Nothing, Nothing, Nothing), htmlElement)
strHtml = html.ToString(SaveOptions.DisableFormatting)
File.WriteAllText(destFileName.FullName, strHtml, Encoding.UTF8)
End Sub
【问题讨论】: