【问题标题】:SevenZipSharp fails to unpack certain tar archivesSevenZipSharp 无法解压某些 tar 档案
【发布时间】:2019-04-18 18:43:28
【问题描述】:

我使用 SevenZipSharp 打包到 7z 档案并从各种档案中解压。多年来一直运作良好。

今天我有一个 .tgz 存档,在第二阶段解包失败:
从 .tgz 中提取 .tar 有效,但解压 .tar 失败。只是这个单一的档案受到影响。所有其他 .tgz 都运行良好。 .tar 本身没有问题,因为使用 7-zip 软件解压也可以。

【问题讨论】:

    标签: c# archive 7zip


    【解决方案1】:

    经过我和一位同事的大量测试,找到了原因:
    我们必须调试 SevenZipSharp DLL 才能找到其中的故障。 DLL 通过读取前 16 个字节并将其与签名列表进行比较来检测存档的类型。这对于大多数类型的存档是正确的,但对于 .tar 存档是错误的,因为 .tar 文件头以存档的文件名开头:TAR @ Wikipedia。签名“ustar”(如果存在)位于地址 257 (0x0101)。

    SevenZipSharp 知道这一点并在此地址检查“ustar”,但前提是之前的检测失败。不幸的是,我们的 TAR 档案的名称是“x42202.tar”。 .dmg 文件的标头(Apple Disk Image)由一个“x”组成(这有多愚蠢,只使用一个字节作为签名??)。所以实际上已经成功检测到文件类型,只是检测结果错误。
    (我知道,链接的维基百科说,.dmg 标头签名是“koly”,但我通过下载确认我在互联网上找到的 .dmg 文件。)

    EDIT 07.12.2021:签名实际上是“koly”,但所谓的标头长度为 512 字节,位于文件的 END 处。然而,SevenZipSharp 在开始时会检查签名。我测试的大多数文件(但不是全部!)的开头确实有一个“x”,但我不知道为什么。也许它是一种非官方的标头(“x”似乎来自 MIME 类型“x-apple-diskimage”)。 - 编辑结束。

    因此我们修改了FileSignatureChecker.cs 中的代码,以避免对 .tar 档案的错误档案类型检测。
    您可以在下面找到原始代码和修改后的代码。
    代码库是最新的 SevenZipSharp 版本,可在 CodePlex archive 中找到。显然它不再处于积极开发中,因为版本号多年来没有改变,如果它仍然处于活动状态,它会在 CodePlex 退役后移动。

    2018 年 11 月 16 日更新
    修改代码中的错误修复:如果找到 enSpecialFormat 则未返回。

    2021 年 12 月 16 日更新
    该错误仍然存​​在于 github 存储库 https://github.com/squid-box/SevenZipSharp 中,这是 SevenZipSharp 项目的当前位置。已上传对错误代码进行重大返工的拉取请求,正在等待合并。

    原代码

    public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
    {
      offset = 0;
      if (!stream.CanRead)
      {
        throw new ArgumentException ("The stream must be readable.");
      }
      if (stream.Length < SIGNATURE_SIZE)
      {
        throw new ArgumentException ("The stream is invalid.");
      }
    
      #region Get file signature
    
      var signature = new byte[SIGNATURE_SIZE];
      int bytesRequired = SIGNATURE_SIZE;
      int index = 0;
      stream.Seek (0, SeekOrigin.Begin);
      while (bytesRequired > 0)
      {
        int bytesRead = stream.Read (signature, index, bytesRequired);
        bytesRequired -= bytesRead;
        index += bytesRead;
      }
      string actualSignature = BitConverter.ToString (signature);
    
      #endregion
    
      InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
      isExecutable = false;
    
      foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
      {
        if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
            actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
            Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
        {
          if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
          {
            suspectedFormat = InArchiveFormat.PE;
            isExecutable = true;
          }
          else
          {
            return Formats.InSignatureFormats[expectedSignature];
          }
        }
      }
    
      // Many Microsoft formats
      if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
      {
        suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
      }
    
      #region SpecialDetect
      try
      {
        SpecialDetect (stream, 257, InArchiveFormat.Tar);
      }
      catch (ArgumentException) { }
      if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
      {
        return InArchiveFormat.Iso;
      }
      if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
      {
        return InArchiveFormat.Iso;
      }
      if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
      {
        return InArchiveFormat.Iso;
      }
      if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
      {
        return InArchiveFormat.Iso;
      }
      if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
      {
        return InArchiveFormat.Hfs;
      }
      #region Last resort for tar - can mistake
      if (stream.Length >= 1024)
      {
        stream.Seek (-1024, SeekOrigin.End);
        byte[] buf = new byte[1024];
        stream.Read (buf, 0, 1024);
        bool istar = true;
        for (int i = 0; i < 1024; i++)
        {
          istar = istar && buf[i] == 0;
        }
        if (istar)
        {
          return InArchiveFormat.Tar;
        }
      }
      #endregion
      #endregion
    
      #region Check if it is an SFX archive or a file with an embedded archive.
      if (suspectedFormat != InArchiveFormat.XZ)
      {
        #region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
        var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
        signature = new byte[scanLength];
        bytesRequired = (int)scanLength;
        index = 0;
        stream.Seek (0, SeekOrigin.Begin);
        while (bytesRequired > 0)
        {
          int bytesRead = stream.Read (signature, index, bytesRequired);
          bytesRequired -= bytesRead;
          index += bytesRead;
        }
        actualSignature = BitConverter.ToString (signature);
        #endregion
    
        foreach (var format in new InArchiveFormat[]
        {
                        InArchiveFormat.Zip,
                        InArchiveFormat.SevenZip,
                        InArchiveFormat.Rar,
                        InArchiveFormat.Cab,
                        InArchiveFormat.Arj
        })
        {
          int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
          if (pos > -1)
          {
            offset = pos / 3;
            return format;
          }
        }
        // Nothing
        if (suspectedFormat == InArchiveFormat.PE)
        {
          return InArchiveFormat.PE;
        }
      }
      #endregion
    
      throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
    }
    

    修改后的代码

    public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
    {
      offset = 0;
      if (!stream.CanRead)
      {
        throw new ArgumentException ("The stream must be readable.");
      }
      if (stream.Length < SIGNATURE_SIZE)
      {
        throw new ArgumentException ("The stream is invalid.");
      }
    
      #region Get file signature
    
      var signature = new byte[SIGNATURE_SIZE];
      int bytesRequired = SIGNATURE_SIZE;
      int index = 0;
      stream.Seek (0, SeekOrigin.Begin);
      while (bytesRequired > 0)
      {
        int bytesRead = stream.Read (signature, index, bytesRequired);
        bytesRequired -= bytesRead;
        index += bytesRead;
      }
      string actualSignature = BitConverter.ToString (signature);
    
      #endregion Get file signature
    
      InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
      isExecutable = false;
    
      InArchiveFormat enDetectedFormat = (InArchiveFormat)(-1);
      InArchiveFormat enSpecialFormat = (InArchiveFormat)(-1);
    
      foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
      {
        if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
            actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
            Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
        {
          if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
          {
            suspectedFormat = InArchiveFormat.PE;
            isExecutable = true;
          }
          else
          {
            enDetectedFormat = Formats.InSignatureFormats[expectedSignature];
            break;
          }
        }
      }
    
      // Many Microsoft formats
      if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
      {
        suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
      }
    
      #region SpecialDetect
    
      if (SpecialDetect (stream, 257, InArchiveFormat.Tar))
      {
        enSpecialFormat = InArchiveFormat.Tar;
      }
      else if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
      {
        enSpecialFormat = InArchiveFormat.Iso;
      }
      else if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
      {
        enSpecialFormat = InArchiveFormat.Iso;
      }
      else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
      {
        enSpecialFormat = InArchiveFormat.Iso;
      }
      else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
      {
        enSpecialFormat = InArchiveFormat.Iso;
      }
      else if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
      {
        enSpecialFormat = InArchiveFormat.Hfs;
      }
    
      #region Last resort for tar - can mistake
    
      bool bPossiblyTAR = false;
      if (stream.Length >= 1024)
      {
        stream.Seek (-1024, SeekOrigin.End);
        byte[] buf = new byte[1024];
        stream.Read (buf, 0, 1024);
        bPossiblyTAR = true;
        for (int i = 0; i < 1024; i++)
        {
          bPossiblyTAR = bPossiblyTAR && buf[i] == 0;
        }
      }
    
      // TAR header starts with the filename of the archive.
      // The filename can be anything, including the Identifiers of the various archive formats.
      // This means that a TAR can be misinterpreted as any type of archive.
      if (enSpecialFormat == InArchiveFormat.Tar
      || bPossiblyTAR)
      {
        var fs = stream as FileStream;
        if (fs != null)
        {
          string sStreamFilename = fs.Name;
          if (sStreamFilename.EndsWith (".tar", StringComparison.InvariantCultureIgnoreCase))
            enDetectedFormat = InArchiveFormat.Tar;
        }
      }
    
      #endregion Last resort for tar - can mistake
    
      if (enDetectedFormat != (InArchiveFormat)(-1))
        return enDetectedFormat;
      if (enSpecialFormat != (InArchiveFormat)(-1))
        return enSpecialFormat;
    
      #endregion SpecialDetect
    
      #region Check if it is an SFX archive or a file with an embedded archive.
    
      if (suspectedFormat != InArchiveFormat.XZ)
      {
        #region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
    
        var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
        signature = new byte[scanLength];
        bytesRequired = (int)scanLength;
        index = 0;
        stream.Seek (0, SeekOrigin.Begin);
        while (bytesRequired > 0)
        {
          int bytesRead = stream.Read (signature, index, bytesRequired);
          bytesRequired -= bytesRead;
          index += bytesRead;
        }
        actualSignature = BitConverter.ToString (signature);
    
        #endregion Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
    
        foreach (var format in new InArchiveFormat[]
        {
                    InArchiveFormat.Zip,
                    InArchiveFormat.SevenZip,
                    InArchiveFormat.Rar,
                    InArchiveFormat.Cab,
                    InArchiveFormat.Arj
        })
        {
          int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
          if (pos > -1)
          {
            offset = pos / 3;
            return format;
          }
        }
        // Nothing
        if (suspectedFormat == InArchiveFormat.PE)
        {
          return InArchiveFormat.PE;
        }
      }
    
      #endregion Check if it is an SFX archive or a file with an embedded archive.
    
      throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
    }
    

    【讨论】:

      猜你喜欢
      • 2015-08-23
      • 1970-01-01
      • 2016-10-08
      • 1970-01-01
      • 2016-06-25
      • 1970-01-01
      • 1970-01-01
      • 2017-10-27
      • 1970-01-01
      相关资源
      最近更新 更多