【问题标题】:Convert docx to html using OpenXMLPowerTools failure with x-emf image使用带有 x-emf 图像的 OpenXMLPowerTools 失败将 docx 转换为 html
【发布时间】:2021-01-20 04:24:46
【问题描述】:

我正在使用 OpenXMLPowerTools 将文档 (docx) 转换为 html 文件的功能,但我遇到了 .emf 图像。

文档中的所有图片都是 .emf 图片,无法转换为 html。

我进行了研究,似乎 OpenXMLPowerTools 不支持此扩展?我说的对吗?

是否有人在使用此工具时遇到此问题?

这是我当前的代码(VB.NET):

Sub Main()

    Dim objSource As Object = Nothing
    Dim wdDoc As WordprocessingDocument
    Dim fi As FileInfo
    Dim destFileName As FileInfo
    Dim di As DirectoryInfo
    Dim strImageDirectoryName As String = String.Empty
    Dim iImageCounter As Integer = 0
    Dim part As CoreFilePropertiesPart
    Dim strPageTitle As String = String.Empty
    Dim HtmlSettings As WmlToHtmlConverterSettings
    Dim strExtension As String = String.Empty
    Dim imageFormat As ImageFormat = Nothing
    Dim strBase64 As String = String.Empty
    Dim strMimeType As String = String.Empty
    Dim format As ImageFormat
    Dim codec As ImageCodecInfo
    Dim strImageSource As String = String.Empty
    Dim img As XElement
    Dim arrByte As Byte()
    Dim htmlElement As XElement
    Dim html As XDocument
    Dim strHtml As String = String.Empty
    Dim strFilePath As String = String.Empty
    Dim strOutputDirectory As String = String.Empty
    Dim objWriter As Object = Nothing
    Dim localDirInfo As DirectoryInfo

    strFilePath = "FilePath"
    strOutputDirectory = "OutputPath"

    fi = New FileInfo(strFilePath)
    destFileName = New FileInfo(fi.Name.Replace(".docx", ".html"))

    If strOutputDirectory IsNot Nothing AndAlso strOutputDirectory <> String.Empty Then
        di = New DirectoryInfo(strOutputDirectory)
        If Not di.Exists Then
            Throw New OpenXmlPowerToolsException("Output directory does not exist")
        End If
        destFileName = New FileInfo(Path.Combine(di.FullName, destFileName.Name))
    End If

    objSource = Packaging.Package.Open(strFilePath)
    wdDoc = WordprocessingDocument.Open(objSource)

    strImageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) & "_files"
    part = wdDoc.CoreFilePropertiesPart
    strPageTitle = fi.FullName

    If part IsNot Nothing Then
        strPageTitle = If(CStr(part.GetXDocument().Descendants(DC.title).FirstOrDefault()), fi.FullName)
    End If

    strImageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) & "_files"

    HtmlSettings = New WmlToHtmlConverterSettings
    Dim imgPart = wdDoc.MainDocumentPart.ImageParts

    With HtmlSettings

        .PageTitle = strPageTitle
        .FabricateCssClasses = True
        .CssClassPrefix = "pt-"
        .RestrictToSupportedLanguages = False
        .RestrictToSupportedNumberingFormats = False
        .ImageHandler = Function(imageinfo)

                            localDirInfo = New DirectoryInfo(strImageDirectoryName)
                            If Not localDirInfo.Exists Then localDirInfo.Create()
                            iImageCounter += 1

                            strExtension = imageinfo.ContentType.Split("/"c)(1).ToLower()

                            If strExtension = "png" Then
                                imageFormat = ImageFormat.Png
                            ElseIf strExtension = "gif" Then
                                imageFormat = ImageFormat.Gif
                            ElseIf strExtension = "bmp" Then
                                imageFormat = ImageFormat.Bmp
                            ElseIf strExtension = "jpeg" Then
                                imageFormat = ImageFormat.Jpeg
                            ElseIf strExtension = "tiff" Then
                                strExtension = "gif"
                                imageFormat = ImageFormat.Gif
                            ElseIf strExtension = "x-wmf" Then
                                strExtension = "wmf"
                                imageFormat = ImageFormat.Wmf
                            ElseIf strExtension = "x-emf" Then
                                strExtension = "emf"
                                imageFormat = ImageFormat.Emf
                            End If

                            If imageFormat Is Nothing Then Return Nothing

                            Using ms As MemoryStream = New MemoryStream()
                                imageinfo.Bitmap.Save(ms, imageFormat)
                                arrByte = ms.ToArray()
                                strBase64 = Convert.ToBase64String(arrByte)
                            End Using

                            format = imageinfo.Bitmap.RawFormat
                            codec = ImageCodecInfo.GetImageDecoders().First(Function(c) c.FormatID = format.Guid)
                            strMimeType = codec.MimeType
                            strImageSource = String.Format("data:{0};base64,{1}", strMimeType, strBase64)
                            img = New XElement(Xhtml.img,
                                               New XAttribute(NoNamespace.src, strImageSource), imageinfo.ImgStyleAttribute,
                                               If(imageinfo.AltText IsNot Nothing, New XAttribute(NoNamespace.alt, imageinfo.AltText), Nothing))

                            Return img

                        End Function

    End With

    htmlElement = WmlToHtmlConverter.ConvertToHtml(wdDoc, HtmlSettings)
    html = New XDocument(New XDocumentType("html", Nothing, Nothing, Nothing), htmlElement)
    strHtml = html.ToString(SaveOptions.DisableFormatting)
    File.WriteAllText(destFileName.FullName, strHtml, Encoding.UTF8)

End Sub

【问题讨论】:

    标签: .net vb.net


    【解决方案1】:

    现在我已经成功地将 docx 转换为带有 emf 图像的 html

    【讨论】:

      猜你喜欢
      • 2019-05-07
      • 1970-01-01
      • 2014-05-25
      • 2016-03-27
      • 1970-01-01
      • 1970-01-01
      • 2016-01-09
      • 2015-05-05
      • 1970-01-01
      相关资源
      最近更新 更多