From 7359c07c4658d0406658914f4c0458260fc976a7 Mon Sep 17 00:00:00 2001 From: Ladislav Zezula Date: Sat, 12 Feb 2022 20:42:23 +0100 Subject: Defragmenting hash table replaced by invalidating bad hash table entries --- src/SBaseFileTable.cpp | 57 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 16 deletions(-) (limited to 'src/SBaseFileTable.cpp') diff --git a/src/SBaseFileTable.cpp b/src/SBaseFileTable.cpp index 90b7aa4..bcbc5e1 100644 --- a/src/SBaseFileTable.cpp +++ b/src/SBaseFileTable.cpp @@ -769,7 +769,6 @@ static TMPQHash * GetHashEntryLocale(TMPQArchive * ha, const char * szFileName, { TMPQHash * pFirstHash = GetFirstHashEntry(ha, szFileName); TMPQHash * pBestEntry = NULL; - TMPQHash * p1stEntry = NULL; TMPQHash * pHash = pFirstHash; // Parse the found hashes @@ -789,7 +788,6 @@ static TMPQHash * GetHashEntryLocale(TMPQArchive * ha, const char * szFileName, { if(pHash->Platform == 0 || pHash->Platform == Platform) { - p1stEntry = (p1stEntry != NULL) ? p1stEntry : pHash; pBestEntry = pHash; } } @@ -798,17 +796,7 @@ static TMPQHash * GetHashEntryLocale(TMPQArchive * ha, const char * szFileName, pHash = GetNextHashEntry(ha, pFirstHash, pHash); } - // - // Different processing (Starcraft vs. Warcraft III), abused by some protectors - // - // * Starcraft I: for an entry with locale&platform = 0, then the first entry is returned - // Map: MPQ_2022_v1_Sniper.scx - // * Warcraft III: for an entry with locale&platform = 0, then the last entry is returned - // Map: MPQ_2015_v1_ProtectedMap_Spazy.w3x - // - - if(ha->dwValidFileFlags == MPQ_FILE_VALID_FLAGS_SCX) - return p1stEntry; + // Return the best entry that we found return pBestEntry; } @@ -840,6 +828,7 @@ static TMPQHash * GetHashEntryExact(TMPQArchive * ha, const char * szFileName, L // are not HASH_ENTRY_FREE, the startup search index does not matter. // Hash table is circular, so as long as there is no terminator, // all entries will be found. +/* static TMPQHash * DefragmentHashTable( TMPQArchive * ha, TMPQHash * pHashTable, @@ -894,6 +883,29 @@ static TMPQHash * DefragmentHashTable( return pHashTable; } +*/ + +static void DeleteInvalidHashTableEntries(TMPQArchive * ha, TMPQHash * pHashTable, TMPQBlock * pBlockTable) +{ + TMPQHeader * pHeader = ha->pHeader; + TMPQHash * pHashTableEnd = pHashTable + pHeader->dwHashTableSize; + TMPQHash * pHash = pHashTable; + + // Sanity checks + assert(pHeader->wFormatVersion == MPQ_FORMAT_VERSION_1); + assert(pHeader->HiBlockTablePos64 == 0); + + // Parse the hash table and move the entries to the begin of it + for(pHash = pHashTable; pHash < pHashTableEnd; pHash++) + { + // Check whether this is a valid hash table entry + if(!IsValidHashEntry1(ha, pHash, pBlockTable)) + { + memset(pHash, 0xFF, sizeof(TMPQHash)); + pHash->dwBlockIndex = HASH_ENTRY_DELETED; + } + } +} static DWORD BuildFileTableFromBlockTable( TMPQArchive * ha, @@ -911,11 +923,24 @@ static DWORD BuildFileTableFromBlockTable( assert(ha->pFileTable != NULL); assert(ha->dwFileTableSize >= ha->dwMaxFileCount); - // Defragment the hash table, if needed + // + // Defragmentation of the hash table was removed. The reason is a MPQ protector, + // two hash entries with the same name, where only the second one is valid. + // The index of the first entry (HashString(szFileName, 0)) points to the second one: + // + // NameA NameB BlkIdx Name + // B701656E FCFB1EED 0000001C staredit\scenario.chk (correct one) + // --> B701656E FCFB1EED 0000001D staredit\scenario.chk (corrupt one) + // + // Defragmenting the hash table corrupts the order and "staredit\scenario.chk" can't be read + // Example MPQ: MPQ_2022_v1_Sniper.scx + // + if(ha->dwFlags & MPQ_FLAG_HASH_TABLE_CUT) { - ha->pHashTable = DefragmentHashTable(ha, ha->pHashTable, pBlockTable); - ha->dwMaxFileCount = pHeader->dwHashTableSize; + //ha->pHashTable = DefragmentHashTable(ha, ha->pHashTable, pBlockTable); + //ha->dwMaxFileCount = pHeader->dwHashTableSize; + DeleteInvalidHashTableEntries(ha, ha->pHashTable, pBlockTable); } // If the hash table or block table is cut, -- cgit v1.2.3 From 6a107601c628ec15b5d7947d6f51d00d709b0e7a Mon Sep 17 00:00:00 2001 From: Ladislav Zezula Date: Sat, 12 Feb 2022 21:14:55 +0100 Subject: Fixed regression tests --- src/SBaseFileTable.cpp | 33 +++++---------------------------- test/StormTest.cpp | 2 +- 2 files changed, 6 insertions(+), 29 deletions(-) (limited to 'src/SBaseFileTable.cpp') diff --git a/src/SBaseFileTable.cpp b/src/SBaseFileTable.cpp index bcbc5e1..1b79243 100644 --- a/src/SBaseFileTable.cpp +++ b/src/SBaseFileTable.cpp @@ -885,28 +885,6 @@ static TMPQHash * DefragmentHashTable( } */ -static void DeleteInvalidHashTableEntries(TMPQArchive * ha, TMPQHash * pHashTable, TMPQBlock * pBlockTable) -{ - TMPQHeader * pHeader = ha->pHeader; - TMPQHash * pHashTableEnd = pHashTable + pHeader->dwHashTableSize; - TMPQHash * pHash = pHashTable; - - // Sanity checks - assert(pHeader->wFormatVersion == MPQ_FORMAT_VERSION_1); - assert(pHeader->HiBlockTablePos64 == 0); - - // Parse the hash table and move the entries to the begin of it - for(pHash = pHashTable; pHash < pHashTableEnd; pHash++) - { - // Check whether this is a valid hash table entry - if(!IsValidHashEntry1(ha, pHash, pBlockTable)) - { - memset(pHash, 0xFF, sizeof(TMPQHash)); - pHash->dwBlockIndex = HASH_ENTRY_DELETED; - } - } -} - static DWORD BuildFileTableFromBlockTable( TMPQArchive * ha, TMPQBlock * pBlockTable) @@ -936,12 +914,11 @@ static DWORD BuildFileTableFromBlockTable( // Example MPQ: MPQ_2022_v1_Sniper.scx // - if(ha->dwFlags & MPQ_FLAG_HASH_TABLE_CUT) - { - //ha->pHashTable = DefragmentHashTable(ha, ha->pHashTable, pBlockTable); - //ha->dwMaxFileCount = pHeader->dwHashTableSize; - DeleteInvalidHashTableEntries(ha, ha->pHashTable, pBlockTable); - } + //if(ha->dwFlags & MPQ_FLAG_HASH_TABLE_CUT) + //{ + // ha->pHashTable = DefragmentHashTable(ha, ha->pHashTable, pBlockTable); + // ha->dwMaxFileCount = pHeader->dwHashTableSize; + //} // If the hash table or block table is cut, // we will defragment the block table diff --git a/test/StormTest.cpp b/test/StormTest.cpp index 3a6e573..a2db1a6 100644 --- a/test/StormTest.cpp +++ b/test/StormTest.cpp @@ -4290,7 +4290,7 @@ static const TEST_INFO Test_Mpqs[] = {_T("MPQ_2015_v1_flem1.w3x"), NULL, TEST_DATA("1c4c13e627658c473e84d94371e31f37", 20)}, {_T("MPQ_2002_v1_ProtectedMap_HashTable_FakeValid.w3x"), NULL, TEST_DATA("5250975ed917375fc6540d7be436d4de", 114)}, {_T("MPQ_2021_v1_CantExtractCHK.scx"), NULL, TEST_DATA("055fd548a789c910d9dd37472ecc1e66", 28)}, - {_T("MPQ_2022_v1_Sniper.scx"), NULL, TEST_DATA("2e955271b70b79344ad85b698f6ce9d8", 63)}, // Multiple items in hash table for staredit\scenario.chk (locale=0, platform=0) + {_T("MPQ_2022_v1_Sniper.scx"), NULL, TEST_DATA("2e955271b70b79344ad85b698f6ce9d8", 64)}, // Multiple items in hash table for staredit\scenario.chk (locale=0, platform=0) {_T("MPQ_2022_v1_OcOc_Bound_2.scx"), NULL, TEST_DATA("25cad16a2fb4e883767a1f512fc1dce7", 16)}, }; -- cgit v1.2.3 From 503ab19d3d4253fb013752572c19c314d19de792 Mon Sep 17 00:00:00 2001 From: Ladislav Zezula Date: Sun, 13 Feb 2022 07:40:22 +0100 Subject: SFileAddListFile optimized for protectors that set too large hash table --- src/SBaseCommon.cpp | 12 +++++++----- src/SBaseFileTable.cpp | 15 +++++++++------ src/SFileListFile.cpp | 42 +++++++++++++++++++++++++++++++++--------- src/StormCommon.h | 2 +- src/StormLib.h | 1 + test/StormTest.cpp | 19 +++++++------------ 6 files changed, 58 insertions(+), 33 deletions(-) (limited to 'src/SBaseFileTable.cpp') diff --git a/src/SBaseCommon.cpp b/src/SBaseCommon.cpp index 5b81bad..1209719 100644 --- a/src/SBaseCommon.cpp +++ b/src/SBaseCommon.cpp @@ -992,7 +992,7 @@ void * LoadMpqTable( DWORD dwCompressedSize, DWORD dwTableSize, DWORD dwKey, - bool * pbTableIsCut) + DWORD * PtrRealTableSize) { ULONGLONG FileSize = 0; LPBYTE pbCompressed = NULL; @@ -1037,13 +1037,15 @@ void * LoadMpqTable( // Fill the extra data with zeros dwBytesToRead = (DWORD)(FileSize - ByteOffset); memset(pbMpqTable + dwBytesToRead, 0, (dwTableSize - dwBytesToRead)); - - // Give the caller information that the table was cut - if(pbTableIsCut != NULL) - pbTableIsCut[0] = true; } } + // Give the caller information that the table was cut + if(PtrRealTableSize != NULL) + { + PtrRealTableSize[0] = dwBytesToRead; + } + // If everything succeeded, read the raw table from the MPQ if(FileStream_Read(ha->pStream, &ByteOffset, pbToRead, dwBytesToRead)) { diff --git a/src/SBaseFileTable.cpp b/src/SBaseFileTable.cpp index 1b79243..94e4469 100644 --- a/src/SBaseFileTable.cpp +++ b/src/SBaseFileTable.cpp @@ -2329,7 +2329,7 @@ static TMPQHash * LoadHashTable(TMPQArchive * ha) TMPQHash * pHashTable = NULL; DWORD dwTableSize; DWORD dwCmpSize; - bool bHashTableIsCut = false; + DWORD dwRealTableSize = 0; // Note: It is allowed to load hash table if it is at offset 0. // Example: MPQ_2016_v1_ProtectedMap_HashOffsIsZero.w3x @@ -2351,12 +2351,15 @@ static TMPQHash * LoadHashTable(TMPQArchive * ha) dwCmpSize = (DWORD)pHeader->HashTableSize64; // Read, decrypt and uncompress the hash table - pHashTable = (TMPQHash *)LoadMpqTable(ha, ByteOffset, pHeader->MD5_HashTable, dwCmpSize, dwTableSize, g_dwHashTableKey, &bHashTableIsCut); + pHashTable = (TMPQHash *)LoadMpqTable(ha, ByteOffset, pHeader->MD5_HashTable, dwCmpSize, dwTableSize, g_dwHashTableKey, &dwRealTableSize); // DumpHashTable(pHashTable, pHeader->dwHashTableSize); // If the hash table was cut, we can/have to defragment it - if(pHashTable != NULL && bHashTableIsCut) + if(pHashTable != NULL && dwRealTableSize != 0 && dwRealTableSize < dwTableSize) + { + ha->dwRealHashTableSize = dwRealTableSize; ha->dwFlags |= (MPQ_FLAG_MALFORMED | MPQ_FLAG_HASH_TABLE_CUT); + } break; case MPQ_SUBTYPE_SQP: @@ -2390,7 +2393,7 @@ TMPQBlock * LoadBlockTable(TMPQArchive * ha, bool /* bDontFixEntries */) ULONGLONG ByteOffset; DWORD dwTableSize; DWORD dwCmpSize; - bool bBlockTableIsCut = false; + DWORD dwRealTableSize; // Note: It is possible that the block table starts at offset 0 // Example: MPQ_2016_v1_ProtectedMap_HashOffsIsZero.w3x @@ -2412,10 +2415,10 @@ TMPQBlock * LoadBlockTable(TMPQArchive * ha, bool /* bDontFixEntries */) dwCmpSize = (DWORD)pHeader->BlockTableSize64; // Read, decrypt and uncompress the block table - pBlockTable = (TMPQBlock * )LoadMpqTable(ha, ByteOffset, NULL, dwCmpSize, dwTableSize, g_dwBlockTableKey, &bBlockTableIsCut); + pBlockTable = (TMPQBlock * )LoadMpqTable(ha, ByteOffset, NULL, dwCmpSize, dwTableSize, g_dwBlockTableKey, &dwRealTableSize); // If the block table was cut, we need to remember it - if(pBlockTable != NULL && bBlockTableIsCut) + if(pBlockTable != NULL && dwRealTableSize && dwRealTableSize < dwTableSize) ha->dwFlags |= (MPQ_FLAG_MALFORMED | MPQ_FLAG_BLOCK_TABLE_CUT); break; diff --git a/src/SFileListFile.cpp b/src/SFileListFile.cpp index a1e16b0..8ce9ae4 100644 --- a/src/SFileListFile.cpp +++ b/src/SFileListFile.cpp @@ -409,8 +409,10 @@ static LPBYTE CreateListFile(TMPQArchive * ha, DWORD * pcbListFile) static DWORD SListFileCreateNodeForAllLocales(TMPQArchive * ha, const char * szFileName) { TFileEntry * pFileEntry; - TMPQHash * pFirstHash; + TMPQHash * pHashEnd; TMPQHash * pHash; + DWORD dwName1; + DWORD dwName2; // If we have HET table, use that one if(ha->pHetTable != NULL) @@ -428,17 +430,39 @@ static DWORD SListFileCreateNodeForAllLocales(TMPQArchive * ha, const char * szF // If we have hash table, we use it if(ha->pHashTable != NULL) { - // Go while we found something - pFirstHash = pHash = GetFirstHashEntry(ha, szFileName); - while(pHash != NULL) + // Get the end of the hash table and both names + pHashEnd = ha->pHashTable + ha->pHeader->dwHashTableSize; + dwName1 = ha->pfnHashString(szFileName, MPQ_HASH_NAME_A); + dwName2 = ha->pfnHashString(szFileName, MPQ_HASH_NAME_B); + + // Some protectors set very high hash table size (0x00400000 items or more) + // in order to make this process very slow. We will ignore items + // in the hash table that would be beyond the end of the file. + // Example MPQ: MPQ_2022_v1_Sniper.scx + if(ha->dwFlags & MPQ_FLAG_HASH_TABLE_CUT) + pHashEnd = ha->pHashTable + (ha->dwRealHashTableSize / sizeof(TMPQHash)); + + // Go through the hash table and put the name in each item that has the same name pair + for(pHash = ha->pHashTable; pHash < pHashEnd; pHash++) { - // Allocate file name for the file entry - AllocateFileName(ha, ha->pFileTable + MPQ_BLOCK_INDEX(pHash), szFileName); - - // Now find the next language version of the file - pHash = GetNextHashEntry(ha, pFirstHash, pHash); + if(pHash->dwName1 == dwName1 && pHash->dwName2 == dwName2 && MPQ_BLOCK_INDEX(pHash) < ha->dwFileTableSize) + { + // Allocate file name for the file entry + AllocateFileName(ha, ha->pFileTable + MPQ_BLOCK_INDEX(pHash), szFileName); + } } + // Go while we found something + //pFirstHash = pHash = GetFirstHashEntry(ha, szFileName); + //while(pHash != NULL) + //{ + // // Allocate file name for the file entry + // AllocateFileName(ha, ha->pFileTable + MPQ_BLOCK_INDEX(pHash), szFileName); + + // // Now find the next language version of the file + // pHash = GetNextHashEntry(ha, pFirstHash, pHash); + //} + return ERROR_SUCCESS; } diff --git a/src/StormCommon.h b/src/StormCommon.h index f452112..865d975 100644 --- a/src/StormCommon.h +++ b/src/StormCommon.h @@ -318,7 +318,7 @@ int SCompDecompressMpk(void * pvOutBuffer, int * pcbOutBuffer, void * pvInBuffer TMPQFile * CreateFileHandle(TMPQArchive * ha, TFileEntry * pFileEntry); TMPQFile * CreateWritableHandle(TMPQArchive * ha, DWORD dwFileSize); -void * LoadMpqTable(TMPQArchive * ha, ULONGLONG ByteOffset, LPBYTE pbTableHash, DWORD dwCompressedSize, DWORD dwRealSize, DWORD dwKey, bool * pbTableIsCut); +void * LoadMpqTable(TMPQArchive * ha, ULONGLONG ByteOffset, LPBYTE pbTableHash, DWORD dwCompressedSize, DWORD dwRealSize, DWORD dwKey, DWORD * PtrRealTableSize); DWORD AllocateSectorBuffer(TMPQFile * hf); DWORD AllocatePatchInfo(TMPQFile * hf, bool bLoadFromFile); DWORD AllocateSectorOffsets(TMPQFile * hf, bool bLoadFromFile); diff --git a/src/StormLib.h b/src/StormLib.h index adfa616..d95c2a2 100644 --- a/src/StormLib.h +++ b/src/StormLib.h @@ -844,6 +844,7 @@ typedef struct _TMPQArchive DWORD dwFileFlags3; // Flags for (signature) DWORD dwAttrFlags; // Flags for the (attributes) file, see MPQ_ATTRIBUTE_XXX DWORD dwValidFileFlags; // Valid flags for the current MPQ + DWORD dwRealHashTableSize; // Real size of the hash table, if MPQ_FLAG_HASH_TABLE_CUT is zet in dwFlags DWORD dwFlags; // See MPQ_FLAG_XXXXX DWORD dwSubType; // See MPQ_SUBTYPE_XXX diff --git a/test/StormTest.cpp b/test/StormTest.cpp index a2db1a6..53d2125 100644 --- a/test/StormTest.cpp +++ b/test/StormTest.cpp @@ -4331,7 +4331,7 @@ int _tmain(int argc, TCHAR * argv[]) for(int i = 2; i < argc; i++) { - TestArchive(argv[i], NULL, TFLG_FILE_LOCALE | 0x0409, "staredit\\scenario.chk", NULL); + TestArchive(argv[i], Bliz, TFLG_FILE_LOCALE | 0x0409, "staredit\\scenario.chk", NULL); } // @@ -4344,15 +4344,6 @@ int _tmain(int argc, TCHAR * argv[]) dwErrCode = TestOnLocalListFile(_T("ListFile_Blizzard.txt")); } - // - // Search all testing archives and verify their SHA1 hash - // - - if(dwErrCode == ERROR_SUCCESS) - { - dwErrCode = FindFiles(ForEachFile_VerifyFileChecksum, szMpqSubDir); - } - // // Test file stream operations // @@ -4422,14 +4413,18 @@ int _tmain(int argc, TCHAR * argv[]) } } - // Open the multi-file archive with wrong prefix to see how StormLib deals with it + // Veryfy SHA1 of each MPQ that we have in the list if(dwErrCode == ERROR_SUCCESS) - dwErrCode = TestOpenArchive_WillFail(_T("flat-file://streaming/model.MPQ.0")); + dwErrCode = FindFiles(ForEachFile_VerifyFileChecksum, szMpqSubDir); // Open every MPQ that we have in the storage if(dwErrCode == ERROR_SUCCESS) dwErrCode = FindFiles(ForEachFile_OpenArchive, NULL); + // Open the multi-file archive with wrong prefix to see how StormLib deals with it + if(dwErrCode == ERROR_SUCCESS) + dwErrCode = TestOpenArchive_WillFail(_T("flat-file://streaming/model.MPQ.0")); + // Test on an archive that has been invalidated by extending an old valid MPQ if(dwErrCode == ERROR_SUCCESS) dwErrCode = TestOpenArchive_Corrupt(_T("MPQ_2013_vX_Battle.net.MPQ")); -- cgit v1.2.3