【问题标题】:How to remove Last Author and Revision Number of Word Document如何删除Word文档的最后作者和修订号
【发布时间】:2017-08-08 05:28:55
【问题描述】:

我有以下代码来删除 Word 文档的 Last Author 和 Revision Number

using Microsoft.Office.Core;
using Word = Microsoft.Office.Interop.Word;
using System.Reflection;
using System.IO;
...


Word.Application oWord;
Word._Document oDoc;

oWord = new Word.Application();
oWord.Visible = false;

List<string> lstDocFile = new List<string>();
//Add doc files here
List<string> g_lstCheck = new List<string>();
//Add list check here "Last Author" and "Revision Number"

foreach (string path in lstDocFile)
{
    oDoc = oWord.Documents.Open(path, ReadOnly: false);
    foreach (string chkItem in g_lstCheck)
    {
        strValue = oDoc.BuiltInDocumentProperties[chkItem].Value;
        if (!string.IsNullOrEmpty(strValue))
        {
            oDoc.BuiltInDocumentProperties[chkItem].Value = string.Empty);
        }
    }
    oDoc.Close(Word.WdSaveOptions.wdSaveChanges);
}
oWord.Quit(Word.WdSaveOptions.wdDoNotSaveChanges);

代码运行后,我希望 Last Author 和 Revision Number 为空字符串。但结果是 Last Author 变成了我,Revision Number 增加了 1。我理解这是因为我使用以下代码保存 word 文档

oDoc.Close(Word.WdSaveOptions.wdSaveChanges);

请帮我用 C# 删除 Last Author 和 Revision Number。

【问题讨论】:

    标签: c# ms-word office-interop com-interop


    【解决方案1】:

    对于.docx(Open Xml)文件,最简单的方法是使用官方的Open XML SDK nuget package。有了这个,操作文档属性就很容易了:

    // open for read write
    using (var package = WordprocessingDocument.Open("myfile.docx", true))
    {
        // modify properties
        package.PackageProperties.Creator = null;
        package.PackageProperties.LastModifiedBy = null;
        package.PackageProperties.Revision = null;
    }
    

    对于 .doc (Word .97->2003) 文件,这里有一个小 C# 方法,可以删除属性(技术上存储在 completely differently):

    RemoveProperties("myfile.doc", SummaryInformationFormatId, PIDSI_AUTHOR, PIDSI_REVNUMBER, PIDSI_LASTAUTHOR);
    
    ...
    
    public static void RemoveProperties(string filePath, Guid propertySet, params int[] ids)
    {
        if (filePath == null)
            throw new ArgumentNullException(nameof(filePath));
    
        if (ids == null || ids.Length == 0)
            return;
    
        int hr = StgOpenStorageEx(filePath, STGM.STGM_DIRECT_SWMR | STGM.STGM_READWRITE | STGM.STGM_SHARE_DENY_WRITE, STGFMT.STGFMT_ANY, 0, IntPtr.Zero, IntPtr.Zero, typeof(IPropertySetStorage).GUID, out IPropertySetStorage setStorage);
        if (hr != 0)
            throw new Win32Exception(hr);
    
        try
        {
            hr = setStorage.Open(propertySet, STGM.STGM_READWRITE | STGM.STGM_SHARE_EXCLUSIVE, out IPropertyStorage storage);
            if (hr != 0)
            {
                const int STG_E_FILENOTFOUND = unchecked((int)0x80030002);
                if (hr == STG_E_FILENOTFOUND)
                    return;
    
                throw new Win32Exception(hr);
            }
    
            var props = new List<PROPSPEC>();
            foreach (int id in ids)
            {
                var prop = new PROPSPEC();
                prop.ulKind = PRSPEC.PRSPEC_PROPID;
                prop.union.propid = id;
                props.Add(prop);
            }
            storage.DeleteMultiple(props.Count, props.ToArray());
            storage.Commit(0);
        }
        finally
        {
            Marshal.ReleaseComObject(setStorage);
        }
    }
    
    // "The Summary Information Property Set"
    // https://msdn.microsoft.com/en-us/library/windows/desktop/aa380376.aspx
    public static readonly Guid SummaryInformationFormatId = new Guid("F29F85E0-4FF9-1068-AB91-08002B27B3D9");
    public const int PIDSI_AUTHOR = 4;
    public const int PIDSI_LASTAUTHOR = 8;
    public const int PIDSI_REVNUMBER = 9;
    
    [Flags]
    private enum STGM
    {
        STGM_READ = 0x00000000,
        STGM_READWRITE = 0x00000002,
        STGM_SHARE_DENY_NONE = 0x00000040,
        STGM_SHARE_DENY_WRITE = 0x00000020,
        STGM_SHARE_EXCLUSIVE = 0x00000010,
        STGM_DIRECT_SWMR = 0x00400000
    }
    
    private enum STGFMT
    {
        STGFMT_STORAGE = 0,
        STGFMT_FILE = 3,
        STGFMT_ANY = 4,
        STGFMT_DOCFILE = 5
    }
    
    [StructLayout(LayoutKind.Sequential)]
    private struct PROPSPEC
    {
        public PRSPEC ulKind;
        public PROPSPECunion union;
    }
    
    [StructLayout(LayoutKind.Explicit)]
    private struct PROPSPECunion
    {
        [FieldOffset(0)]
        public int propid;
        [FieldOffset(0)]
        public IntPtr lpwstr;
    }
    
    private enum PRSPEC
    {
        PRSPEC_LPWSTR = 0,
        PRSPEC_PROPID = 1
    }
    
    [DllImport("ole32.dll")]
    private static extern int StgOpenStorageEx([MarshalAs(UnmanagedType.LPWStr)] string pwcsName, STGM grfMode, STGFMT stgfmt, int grfAttrs, IntPtr pStgOptions, IntPtr reserved2, [MarshalAs(UnmanagedType.LPStruct)] Guid riid, out IPropertySetStorage ppObjectOpen);
    
    [Guid("0000013A-0000-0000-C000-000000000046"), InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]
    private interface IPropertySetStorage
    {
        void Unused1();
        [PreserveSig]
        int Open([MarshalAs(UnmanagedType.LPStruct)] Guid rfmtid, STGM grfMode, out IPropertyStorage storage);
    }
    
    [Guid("00000138-0000-0000-C000-000000000046"), InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]
    private interface IPropertyStorage
    {
        void Unused1();
        void Unused2();
        void DeleteMultiple(int cpspec, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 0)] PROPSPEC[] rgpspec);
        void Unused4();
        void Unused5();
        void Unused6();
        void Commit(uint grfCommitFlags);
        // rest ommited
    }
    

    【讨论】:

    • 非常感谢,我看到Dsofile.dll eula 说:“用户承担全部风险”,“使用...自担风险”,...。所以我在使用 Dsofile.dll 时有点担心。所以我想问,Open XML SDK nuget 包是否比 Dsofile.dll 更安全。
    • @123iamking - 是的,它是微软官方开源软件包:github.com/OfficeDev/Open-XML-SDK
    • 需要注意的是,必须添加 WindowsBase.dll 来修复构建错误:*.com/a/7814593/4608491
    • 是否可以编辑 .doc 文件,根据这个答案:*.com/a/4220382/4608491 ,它仅适用于 .docx
    • 不,open xml 仅适用于 open xml 文件格式。旧 Office 文件(.doc、.xls 等)是“复合文件”。我提供了一个示例 C# 代码来删除这些文件的属性。
    【解决方案2】:

    *根据this article,作者Mr.Vivek Singh给了我们一些有用的代码。

    **我们还有来自微软的this library -Dsofile.dll

    像这样去。

    第 1 步:下载 Dsofile.dll 库 (**),解压并获取文件 Interop.Dsofile.dll(检索日期 8/8/2017)

    第 2 步:为您的 C# 项目添加引用文件 Interop.Dsofile.dll。

    第 3 步:使用此代码(我从文章 * 编辑 - 感谢 Vivek Singh,我只是删除了 OleDocumentPropertiesClass 中的单词 Class 以防止构建错误,并稍作编辑以解决此问题)

            string fileName = "";//Add the full path of the Word file
    
            OleDocumentProperties myDSOOleDocument = new OleDocumentProperties();
            myDSOOleDocument.Open(fileName, false,
     DSOFile.dsoFileOpenOptions.dsoOptionOpenReadOnlyIfNoWriteAccess);
    
            myDSOOleDocument.SummaryProperties.LastSavedBy = string.Empty;
           //myDSOOleDocument.SummaryProperties.RevisionNumber = string.Empty; //This can't be edit -readonly
    
            myDSOOleDocument.Save();
            myDSOOleDocument.Close();
    

    无论如何,我无法编辑 RevisionNumber,因为它是只读的。好吧,我只能满足于我能得到的。

    【讨论】:

    • 我比 OpenXML 推荐这个解决方案,因为它更容易删除新旧 doc & excel 类型
    最近更新 更多