我使用 SevenZipSharp 打包到 7z 文件并从各种文件中解包。它多年来一直运作良好。
今天我有一个 .tgz 存档在第二阶段解包失败:
从 .tgz 中提取 .tar 有效,但解压 .tar 失败。受影响的只是这个单一文件。所有其他 .tgz 都可以正常工作。 .tar 本身没有问题,因为使用 7-zip 软件解压缩也可以。
请您参考如下方法:
经过大量测试我和同事找到了原因:
我们必须调试 SevenZipSharp DLL 才能找到其中的故障。 DLL 通过读取前 16 个字节并将其与签名列表进行比较来检测存档的类型。这对于大多数类型的存档都是正确的,但对于 .tar 存档是错误的,因为 .tar 文件头以存档的文件名开头:TAR @ Wikipedia .签名“ustar”(如果存在)位于地址 257 (0x0101)。
SevenZipSharp 知道这一点并在该地址检查“ustar”,但前提是之前的检测失败。不幸的是,我们的 TAR 文件的名称是“x42202.tar”。而 .dmg 文件的 header ( Apple Disk Image ) 由一个“x”组成(这有多愚蠢,只使用一个字节作为签名??)。所以其实是成功检测到了文件类型,只是检测结果有误。
(我知道,链接的维基百科说,.dmg header 签名是“koly”,但我用我在互联网上找到的下载的 .dmg 文件进行了确认。)
编辑 07.12.2021:签名实际上是“koly”,但所谓的 header 长 512 字节,位于文件的 END。然而,SevenZipSharp 在开始时会检查签名。我测试过的大多数文件(但不是全部!)确实在开头有一个“x”,但我不知道为什么。也许它是一种非官方的 header (“x”似乎来自 MIME 类型“x-apple-diskimage”)。 - 编辑结束。
因此我们修改了 FileSignatureChecker.cs
中的代码,以避免对 .tar 文件进行错误的文件类型检测。
您可以在下面找到原始代码和修改后的代码。
代码库是最新的 SevenZipSharp 版本,可以在 CodePlex archive 中找到.显然它不再处于活跃开发状态,因为版本号多年来没有改变,如果它仍然活跃,它会在 CodePlex 停用后移动。
2018-11-16 更新
修改代码中的错误修复:如果找到,则不返回 enSpecialFormat。
2021-12-16 更新
该错误仍然存在于 github 存储库中 https://github.com/squid-box/SevenZipSharp ,这是 SevenZipSharp 项目的当前位置。已上传对错误代码进行重大返工的拉取请求,正在等待合并。
原始代码
public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
{
offset = 0;
if (!stream.CanRead)
{
throw new ArgumentException ("The stream must be readable.");
}
if (stream.Length < SIGNATURE_SIZE)
{
throw new ArgumentException ("The stream is invalid.");
}
#region Get file signature
var signature = new byte[SIGNATURE_SIZE];
int bytesRequired = SIGNATURE_SIZE;
int index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
string actualSignature = BitConverter.ToString (signature);
#endregion
InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
isExecutable = false;
foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
{
if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
{
if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
{
suspectedFormat = InArchiveFormat.PE;
isExecutable = true;
}
else
{
return Formats.InSignatureFormats[expectedSignature];
}
}
}
// Many Microsoft formats
if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
{
suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
}
#region SpecialDetect
try
{
SpecialDetect (stream, 257, InArchiveFormat.Tar);
}
catch (ArgumentException) { }
if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
{
return InArchiveFormat.Hfs;
}
#region Last resort for tar - can mistake
if (stream.Length >= 1024)
{
stream.Seek (-1024, SeekOrigin.End);
byte[] buf = new byte[1024];
stream.Read (buf, 0, 1024);
bool istar = true;
for (int i = 0; i < 1024; i++)
{
istar = istar && buf[i] == 0;
}
if (istar)
{
return InArchiveFormat.Tar;
}
}
#endregion
#endregion
#region Check if it is an SFX archive or a file with an embedded archive.
if (suspectedFormat != InArchiveFormat.XZ)
{
#region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
signature = new byte[scanLength];
bytesRequired = (int)scanLength;
index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
actualSignature = BitConverter.ToString (signature);
#endregion
foreach (var format in new InArchiveFormat[]
{
InArchiveFormat.Zip,
InArchiveFormat.SevenZip,
InArchiveFormat.Rar,
InArchiveFormat.Cab,
InArchiveFormat.Arj
})
{
int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
if (pos > -1)
{
offset = pos / 3;
return format;
}
}
// Nothing
if (suspectedFormat == InArchiveFormat.PE)
{
return InArchiveFormat.PE;
}
}
#endregion
throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
}
修改代码
public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
{
offset = 0;
if (!stream.CanRead)
{
throw new ArgumentException ("The stream must be readable.");
}
if (stream.Length < SIGNATURE_SIZE)
{
throw new ArgumentException ("The stream is invalid.");
}
#region Get file signature
var signature = new byte[SIGNATURE_SIZE];
int bytesRequired = SIGNATURE_SIZE;
int index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
string actualSignature = BitConverter.ToString (signature);
#endregion Get file signature
InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
isExecutable = false;
InArchiveFormat enDetectedFormat = (InArchiveFormat)(-1);
InArchiveFormat enSpecialFormat = (InArchiveFormat)(-1);
foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
{
if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
{
if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
{
suspectedFormat = InArchiveFormat.PE;
isExecutable = true;
}
else
{
enDetectedFormat = Formats.InSignatureFormats[expectedSignature];
break;
}
}
}
// Many Microsoft formats
if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
{
suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
}
#region SpecialDetect
if (SpecialDetect (stream, 257, InArchiveFormat.Tar))
{
enSpecialFormat = InArchiveFormat.Tar;
}
else if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
{
enSpecialFormat = InArchiveFormat.Hfs;
}
#region Last resort for tar - can mistake
bool bPossiblyTAR = false;
if (stream.Length >= 1024)
{
stream.Seek (-1024, SeekOrigin.End);
byte[] buf = new byte[1024];
stream.Read (buf, 0, 1024);
bPossiblyTAR = true;
for (int i = 0; i < 1024; i++)
{
bPossiblyTAR = bPossiblyTAR && buf[i] == 0;
}
}
// TAR header starts with the filename of the archive.
// The filename can be anything, including the Identifiers of the various archive formats.
// This means that a TAR can be misinterpreted as any type of archive.
if (enSpecialFormat == InArchiveFormat.Tar
|| bPossiblyTAR)
{
var fs = stream as FileStream;
if (fs != null)
{
string sStreamFilename = fs.Name;
if (sStreamFilename.EndsWith (".tar", StringComparison.InvariantCultureIgnoreCase))
enDetectedFormat = InArchiveFormat.Tar;
}
}
#endregion Last resort for tar - can mistake
if (enDetectedFormat != (InArchiveFormat)(-1))
return enDetectedFormat;
if (enSpecialFormat != (InArchiveFormat)(-1))
return enSpecialFormat;
#endregion SpecialDetect
#region Check if it is an SFX archive or a file with an embedded archive.
if (suspectedFormat != InArchiveFormat.XZ)
{
#region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
signature = new byte[scanLength];
bytesRequired = (int)scanLength;
index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
actualSignature = BitConverter.ToString (signature);
#endregion Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
foreach (var format in new InArchiveFormat[]
{
InArchiveFormat.Zip,
InArchiveFormat.SevenZip,
InArchiveFormat.Rar,
InArchiveFormat.Cab,
InArchiveFormat.Arj
})
{
int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
if (pos > -1)
{
offset = pos / 3;
return format;
}
}
// Nothing
if (suspectedFormat == InArchiveFormat.PE)
{
return InArchiveFormat.PE;
}
}
#endregion Check if it is an SFX archive or a file with an embedded archive.
throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
}