IT源码网

c#之SevenZipSharp 无法解压某些 tar 文件

cloudgamer 2023年09月06日 程序员 101 0

我使用 SevenZipSharp 打包到 7z 文件并从各种文件中解包。它多年来一直运作良好。

今天我有一个 .tgz 存档在第二阶段解包失败:
从 .tgz 中提取 .tar 有效,但解压 .tar 失败。受影响的只是这个单一文件。所有其他 .tgz 都可以正常工作。 .tar 本身没有问题,因为使用 7-zip 软件解压缩也可以。

请您参考如下方法:

经过大量测试我和同事找到了原因:
我们必须调试 SevenZipSharp DLL 才能找到其中的故障。 DLL 通过读取前 16 个字节并将其与签名列表进行比较来检测存档的类型。这对于大多数类型的存档都是正确的,但对于 .tar 存档是错误的,因为 .tar 文件头以存档的文件名开头:TAR @ Wikipedia .签名“ustar”(如果存在)位于地址 257 (0x0101)。

SevenZipSharp 知道这一点并在该地址检查“ustar”,但前提是之前的检测失败。不幸的是,我们的 TAR 文件的名称是“x42202.tar”。而 .dmg 文件的 header ( Apple Disk Image ) 由一个“x”组成(这有多愚蠢,只使用一个字节作为签名??)。所以其实是成功检测到了文件类型,只是检测结果有误。
(我知道,链接的维基百科说,.dmg header 签名是“koly”,但我用我在互联网上找到的下载的 .dmg 文件进行了确认。)

编辑 07.12.2021:签名实际上是“koly”,但所谓的 header 长 512 字节,位于文件的 END。然而,SevenZipSharp 在开始时会检查签名。我测试过的大多数文件(但不是全部!)确实在开头有一个“x”,但我不知道为什么。也许它是一种非官方的 header (“x”似乎来自 MIME 类型“x-apple-diskimage”)。 - 编辑结束。

因此我们修改了 FileSignatureChecker.cs 中的代码,以避免对 .tar 文件进行错误的文件类型检测。
您可以在下面找到原始代码和修改后的代码。
代码库是最新的 SevenZipSharp 版本,可以在 CodePlex archive 中找到.显然它不再处于活跃开发状态,因为版本号多年来没有改变,如果它仍然活跃,它会在 CodePlex 停用后移动。

2018-11-16 更新
修改代码中的错误修复:如果找到,则不返回 enSpecialFormat。

2021-12-16 更新
该错误仍然存​​在于 github 存储库中 https://github.com/squid-box/SevenZipSharp ,这是 SevenZipSharp 项目的当前位置。已上传对错误代码进行重大返工的拉取请求,正在等待合并。

原始代码

public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable) 
{ 
  offset = 0; 
  if (!stream.CanRead) 
  { 
    throw new ArgumentException ("The stream must be readable."); 
  } 
  if (stream.Length < SIGNATURE_SIZE) 
  { 
    throw new ArgumentException ("The stream is invalid."); 
  } 
 
  #region Get file signature 
 
  var signature = new byte[SIGNATURE_SIZE]; 
  int bytesRequired = SIGNATURE_SIZE; 
  int index = 0; 
  stream.Seek (0, SeekOrigin.Begin); 
  while (bytesRequired > 0) 
  { 
    int bytesRead = stream.Read (signature, index, bytesRequired); 
    bytesRequired -= bytesRead; 
    index += bytesRead; 
  } 
  string actualSignature = BitConverter.ToString (signature); 
 
  #endregion 
 
  InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab 
  isExecutable = false; 
 
  foreach (string expectedSignature in Formats.InSignatureFormats.Keys) 
  { 
    if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) || 
        actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) && 
        Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh) 
    { 
      if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE) 
      { 
        suspectedFormat = InArchiveFormat.PE; 
        isExecutable = true; 
      } 
      else 
      { 
        return Formats.InSignatureFormats[expectedSignature]; 
      } 
    } 
  } 
 
  // Many Microsoft formats 
  if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase)) 
  { 
    suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ 
  } 
 
  #region SpecialDetect 
  try 
  { 
    SpecialDetect (stream, 257, InArchiveFormat.Tar); 
  } 
  catch (ArgumentException) { } 
  if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso)) 
  { 
    return InArchiveFormat.Iso; 
  } 
  if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso)) 
  { 
    return InArchiveFormat.Iso; 
  } 
  if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso)) 
  { 
    return InArchiveFormat.Iso; 
  } 
  if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso)) 
  { 
    return InArchiveFormat.Iso; 
  } 
  if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs)) 
  { 
    return InArchiveFormat.Hfs; 
  } 
  #region Last resort for tar - can mistake 
  if (stream.Length >= 1024) 
  { 
    stream.Seek (-1024, SeekOrigin.End); 
    byte[] buf = new byte[1024]; 
    stream.Read (buf, 0, 1024); 
    bool istar = true; 
    for (int i = 0; i < 1024; i++) 
    { 
      istar = istar && buf[i] == 0; 
    } 
    if (istar) 
    { 
      return InArchiveFormat.Tar; 
    } 
  } 
  #endregion 
  #endregion 
 
  #region Check if it is an SFX archive or a file with an embedded archive. 
  if (suspectedFormat != InArchiveFormat.XZ) 
  { 
    #region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes 
    var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH); 
    signature = new byte[scanLength]; 
    bytesRequired = (int)scanLength; 
    index = 0; 
    stream.Seek (0, SeekOrigin.Begin); 
    while (bytesRequired > 0) 
    { 
      int bytesRead = stream.Read (signature, index, bytesRequired); 
      bytesRequired -= bytesRead; 
      index += bytesRead; 
    } 
    actualSignature = BitConverter.ToString (signature); 
    #endregion 
 
    foreach (var format in new InArchiveFormat[] 
    { 
                    InArchiveFormat.Zip, 
                    InArchiveFormat.SevenZip, 
                    InArchiveFormat.Rar, 
                    InArchiveFormat.Cab, 
                    InArchiveFormat.Arj 
    }) 
    { 
      int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]); 
      if (pos > -1) 
      { 
        offset = pos / 3; 
        return format; 
      } 
    } 
    // Nothing 
    if (suspectedFormat == InArchiveFormat.PE) 
    { 
      return InArchiveFormat.PE; 
    } 
  } 
  #endregion 
 
  throw new ArgumentException ("The stream is invalid or no corresponding signature was found."); 
} 

修改代码

public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable) 
{ 
  offset = 0; 
  if (!stream.CanRead) 
  { 
    throw new ArgumentException ("The stream must be readable."); 
  } 
  if (stream.Length < SIGNATURE_SIZE) 
  { 
    throw new ArgumentException ("The stream is invalid."); 
  } 
 
  #region Get file signature 
 
  var signature = new byte[SIGNATURE_SIZE]; 
  int bytesRequired = SIGNATURE_SIZE; 
  int index = 0; 
  stream.Seek (0, SeekOrigin.Begin); 
  while (bytesRequired > 0) 
  { 
    int bytesRead = stream.Read (signature, index, bytesRequired); 
    bytesRequired -= bytesRead; 
    index += bytesRead; 
  } 
  string actualSignature = BitConverter.ToString (signature); 
 
  #endregion Get file signature 
 
  InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab 
  isExecutable = false; 
 
  InArchiveFormat enDetectedFormat = (InArchiveFormat)(-1); 
  InArchiveFormat enSpecialFormat = (InArchiveFormat)(-1); 
 
  foreach (string expectedSignature in Formats.InSignatureFormats.Keys) 
  { 
    if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) || 
        actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) && 
        Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh) 
    { 
      if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE) 
      { 
        suspectedFormat = InArchiveFormat.PE; 
        isExecutable = true; 
      } 
      else 
      { 
        enDetectedFormat = Formats.InSignatureFormats[expectedSignature]; 
        break; 
      } 
    } 
  } 
 
  // Many Microsoft formats 
  if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase)) 
  { 
    suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ 
  } 
 
  #region SpecialDetect 
 
  if (SpecialDetect (stream, 257, InArchiveFormat.Tar)) 
  { 
    enSpecialFormat = InArchiveFormat.Tar; 
  } 
  else if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso)) 
  { 
    enSpecialFormat = InArchiveFormat.Iso; 
  } 
  else if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso)) 
  { 
    enSpecialFormat = InArchiveFormat.Iso; 
  } 
  else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso)) 
  { 
    enSpecialFormat = InArchiveFormat.Iso; 
  } 
  else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso)) 
  { 
    enSpecialFormat = InArchiveFormat.Iso; 
  } 
  else if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs)) 
  { 
    enSpecialFormat = InArchiveFormat.Hfs; 
  } 
 
  #region Last resort for tar - can mistake 
 
  bool bPossiblyTAR = false; 
  if (stream.Length >= 1024) 
  { 
    stream.Seek (-1024, SeekOrigin.End); 
    byte[] buf = new byte[1024]; 
    stream.Read (buf, 0, 1024); 
    bPossiblyTAR = true; 
    for (int i = 0; i < 1024; i++) 
    { 
      bPossiblyTAR = bPossiblyTAR && buf[i] == 0; 
    } 
  } 
 
  // TAR header starts with the filename of the archive. 
  // The filename can be anything, including the Identifiers of the various archive formats. 
  // This means that a TAR can be misinterpreted as any type of archive. 
  if (enSpecialFormat == InArchiveFormat.Tar 
  || bPossiblyTAR) 
  { 
    var fs = stream as FileStream; 
    if (fs != null) 
    { 
      string sStreamFilename = fs.Name; 
      if (sStreamFilename.EndsWith (".tar", StringComparison.InvariantCultureIgnoreCase)) 
        enDetectedFormat = InArchiveFormat.Tar; 
    } 
  } 
 
  #endregion Last resort for tar - can mistake 
 
  if (enDetectedFormat != (InArchiveFormat)(-1)) 
    return enDetectedFormat; 
  if (enSpecialFormat != (InArchiveFormat)(-1)) 
    return enSpecialFormat; 
 
  #endregion SpecialDetect 
 
  #region Check if it is an SFX archive or a file with an embedded archive. 
 
  if (suspectedFormat != InArchiveFormat.XZ) 
  { 
    #region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes 
 
    var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH); 
    signature = new byte[scanLength]; 
    bytesRequired = (int)scanLength; 
    index = 0; 
    stream.Seek (0, SeekOrigin.Begin); 
    while (bytesRequired > 0) 
    { 
      int bytesRead = stream.Read (signature, index, bytesRequired); 
      bytesRequired -= bytesRead; 
      index += bytesRead; 
    } 
    actualSignature = BitConverter.ToString (signature); 
 
    #endregion Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes 
 
    foreach (var format in new InArchiveFormat[] 
    { 
                InArchiveFormat.Zip, 
                InArchiveFormat.SevenZip, 
                InArchiveFormat.Rar, 
                InArchiveFormat.Cab, 
                InArchiveFormat.Arj 
    }) 
    { 
      int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]); 
      if (pos > -1) 
      { 
        offset = pos / 3; 
        return format; 
      } 
    } 
    // Nothing 
    if (suspectedFormat == InArchiveFormat.PE) 
    { 
      return InArchiveFormat.PE; 
    } 
  } 
 
  #endregion Check if it is an SFX archive or a file with an embedded archive. 
 
  throw new ArgumentException ("The stream is invalid or no corresponding signature was found."); 
} 


评论关闭
IT源码网

微信公众号号:IT虾米 (左侧二维码扫一扫)欢迎添加!