From 2ec11ad1fd50098a789a9b477bb9c4240f569e7e Mon Sep 17 00:00:00 2001 From: Ladislav Zezula Date: Sat, 2 Nov 2024 09:18:37 +0100 Subject: Added functions for conversions between MPQ file name <-> Safe file name --- CMakeLists.txt | 1 + StormLib.vcxproj | 1 + StormLib.vcxproj.filters | 3 + StormLib_dll.vcxproj | 1 + StormLib_dll.vcxproj.filters | 3 + StormLib_test.vcxproj | 1 + StormLib_test.vcxproj.filters | 3 + StormLib_vs08.vcproj | 164 +++++++++++++ StormLib_vs08_dll.vcproj | 84 +++++++ StormLib_vs08_test.vcproj | 36 +++ src/FileStream.cpp | 4 +- src/SBaseCommon.cpp | 6 +- src/SMemUtf8.cpp | 551 ++++++++++++++++++++++++++++++++++++++++++ src/StormCommon.h | 73 +++++- src/StormLib.h | 31 ++- src/StormPort.h | 38 ++- src/wdk/sources-cpp.cpp | 1 + test/StormTest.cpp | 189 ++++++++++++--- test/stormlib-test-001.txt | 3 +- 19 files changed, 1119 insertions(+), 74 deletions(-) create mode 100644 src/SMemUtf8.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5bf15df..8d17ebe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,7 @@ set(SRC_FILES src/SFilePatchArchives.cpp src/SFileReadFile.cpp src/SFileVerify.cpp + src/SMemUtf8.cpp src/libtomcrypt/src/pk/rsa/rsa_verify_simple.c src/libtomcrypt/src/misc/crypt_libc.c ) diff --git a/StormLib.vcxproj b/StormLib.vcxproj index 26d8f70..57cea24 100644 --- a/StormLib.vcxproj +++ b/StormLib.vcxproj @@ -1006,6 +1006,7 @@ + diff --git a/StormLib.vcxproj.filters b/StormLib.vcxproj.filters index 22c9793..024fa9d 100644 --- a/StormLib.vcxproj.filters +++ b/StormLib.vcxproj.filters @@ -217,5 +217,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/StormLib_dll.vcxproj b/StormLib_dll.vcxproj index f437bd9..67899f5 100644 --- a/StormLib_dll.vcxproj +++ b/StormLib_dll.vcxproj @@ -322,6 +322,7 @@ Level4 Level4 + diff --git a/StormLib_dll.vcxproj.filters b/StormLib_dll.vcxproj.filters index 2eaf026..0e05d88 100644 --- a/StormLib_dll.vcxproj.filters +++ b/StormLib_dll.vcxproj.filters @@ -220,6 +220,9 @@ Source Files + + Source Files + diff --git a/StormLib_test.vcxproj b/StormLib_test.vcxproj index 18cee5b..71aebf9 100644 --- a/StormLib_test.vcxproj +++ b/StormLib_test.vcxproj @@ -336,6 +336,7 @@ Level4 Level4 + diff --git a/StormLib_test.vcxproj.filters b/StormLib_test.vcxproj.filters index d341b12..cf1aa5e 100644 --- a/StormLib_test.vcxproj.filters +++ b/StormLib_test.vcxproj.filters @@ -226,5 +226,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/StormLib_vs08.vcproj b/StormLib_vs08.vcproj index 031dde8..3f8674d 100644 --- a/StormLib_vs08.vcproj +++ b/StormLib_vs08.vcproj @@ -4038,6 +4038,170 @@ /> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/StormLib_vs08_dll.vcproj b/StormLib_vs08_dll.vcproj index 66f0705..b51db31 100644 --- a/StormLib_vs08_dll.vcproj +++ b/StormLib_vs08_dll.vcproj @@ -1680,6 +1680,90 @@ /> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/StormLib_vs08_test.vcproj b/StormLib_vs08_test.vcproj index d3506ea..48f2dbd 100644 --- a/StormLib_vs08_test.vcproj +++ b/StormLib_vs08_test.vcproj @@ -1038,6 +1038,42 @@ /> + + + + + + + + + + + + + + diff --git a/src/FileStream.cpp b/src/FileStream.cpp index b66098c..39db0a0 100644 --- a/src/FileStream.cpp +++ b/src/FileStream.cpp @@ -78,7 +78,7 @@ static void CreateNameWithSuffix(LPTSTR szBuffer, size_t cchMaxChars, LPCTSTR sz *szBuffer++ = '.'; // Append the number - IntToString(szBuffer, szBufferEnd - szBuffer + 1, nValue); + SMemIntToStr(szBuffer, szBufferEnd - szBuffer + 1, nValue); } //----------------------------------------------------------------------------- @@ -1765,7 +1765,7 @@ static void PartStream_Close(TBlockStream * pStream) // Make sure that the header is properly BSWAPed BSWAP_ARRAY32_UNSIGNED(&PartHeader, sizeof(PART_FILE_HEADER)); - IntToString(PartHeader.GameBuildNumber, _countof(PartHeader.GameBuildNumber), pStream->BuildNumber); + SMemIntToStr(PartHeader.GameBuildNumber, _countof(PartHeader.GameBuildNumber), pStream->BuildNumber); // Write the part header pStream->BaseWrite(pStream, &ByteOffset, &PartHeader, sizeof(PART_FILE_HEADER)); diff --git a/src/SBaseCommon.cpp b/src/SBaseCommon.cpp index 0de7864..3f95ded 100644 --- a/src/SBaseCommon.cpp +++ b/src/SBaseCommon.cpp @@ -30,7 +30,7 @@ LCID g_lcFileLocale = 0; // Compound of file locale and p // Converts ASCII characters to lowercase // Converts slash (0x2F) to backslash (0x5C) -unsigned char AsciiToLowerTable[256] = +const unsigned char AsciiToLowerTable[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, @@ -52,7 +52,7 @@ unsigned char AsciiToLowerTable[256] = // Converts ASCII characters to uppercase // Converts slash (0x2F) to backslash (0x5C) -unsigned char AsciiToUpperTable[256] = +const unsigned char AsciiToUpperTable[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, @@ -135,7 +135,7 @@ void StringCreatePseudoFileName(char * szBuffer, size_t cchMaxChars, unsigned in szBuffer = StringCopy(szBuffer, (szBufferEnd - szBuffer), "File"); // Number - szBuffer = IntToString(szBuffer, szBufferEnd - szBuffer + 1, nIndex, 8); + szBuffer = SMemIntToStr(szBuffer, szBufferEnd - szBuffer + 1, nIndex, 8); // Dot if(szBuffer < szBufferEnd) diff --git a/src/SMemUtf8.cpp b/src/SMemUtf8.cpp new file mode 100644 index 0000000..5832422 --- /dev/null +++ b/src/SMemUtf8.cpp @@ -0,0 +1,551 @@ +/*****************************************************************************/ +/* SFileVerify.cpp Copyright (c) Ladislav Zezula 2010 */ +/*---------------------------------------------------------------------------*/ +/* Support for conversion of UTF-8 <-> File name */ +/* */ +/* File names in the MPQs are assumed to be UTF-8. However, bad sequences */ +/* or filename unsafe characters are allowed in the list files, but won't */ +/* work in unpacking files from MPQ to a local file. */ +/* */ +/* This module contains cross-platform comparable conversion between UTF-8 */ +/* and file names that will produce identical file names across platforms. */ +/*---------------------------------------------------------------------------*/ +/* Date Ver Who Comment */ +/* -------- ---- --- ------- */ +/* 31.10.24 1.00 Lad Created */ +/*****************************************************************************/ + +#define __STORMLIB_SELF__ +#include "StormLib.h" +#include "StormCommon.h" + +//----------------------------------------------------------------------------- +// Local defines + +#define MAX_INVALID_CHARS 128 // Maximum number of invalid characters in a row + +//----------------------------------------------------------------------------- +// Conversion tables + +const unsigned char SMemCharToByte[0x80] = +{ + // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0xFF + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x10 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x20 + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x30 + 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x40 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x50 + 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x60 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF // 0x70 +}; + +//----------------------------------------------------------------------------- +// Local functions + +// Bit mask of characters that are file name safe. We will maintain +// the same charset even on non-Windows in order to keep the file names equal +static unsigned int FileNameSafeChars[4] = +{ + 0x00000000, 0x2BFF7BFB, 0xFFFFFFFF, 0xEFFFFFFF // Windows: [0x20-0x7F], except 0x22, 0x2A, 0x2F, 0x3A, 0x3C, 0x3E, 0x3F, 0x7C +// 0xfffffffe, 0xffff7fff, 0xffffffff, 0xffffffff // Linux: [0x01-0x7F], except 0x2F +}; + +static bool UTF8_IsBadFileNameCharacter(DWORD ch) +{ + // It is guaranteed that the character is in range of 0x00 - 0x7F + assert(ch < 0x80); + + // Use the bit from the table + return (FileNameSafeChars[ch / 32] & (1 << (ch % 32))) ? false : true; +} + +static DWORD UTF8_DecodeSequence(const BYTE * pbString, BYTE BitsMask, size_t ccFollowBytes, DWORD dwMinValue, DWORD dwMaxValue, DWORD & dwCodePoint, size_t & ccBytesEaten) +{ + const BYTE * pbSaveString = pbString; + DWORD dwAccumulator; + + // Extract the low bits from the leading byte + dwAccumulator = pbString[0] & BitsMask; + ccBytesEaten = 1; + pbString++; + + // Process the follow-up bytes + for(size_t i = 0; i < ccFollowBytes; i++) + { + // Every follow-up byte in the UTF-8 sequence must start with 10xxxxxx + if((pbString[0] & 0xC0) != 0x80) + return ERROR_NO_UNICODE_TRANSLATION; + + // Add 6 bits to the accumulator + dwAccumulator = (dwAccumulator << 6) | (*pbString++ & 0x3F); + } + + // Check whether the code point is in the given range + if(!(dwMinValue <= dwAccumulator && dwAccumulator <= dwMaxValue)) + return ERROR_INVALID_DATA; + + // Give the number of bytes eaten and the decoded code point + ccBytesEaten = (pbString - pbSaveString); + dwCodePoint = dwAccumulator; + return ERROR_SUCCESS; +} + +// https://en.wikipedia.org/wiki/UTF-8 +static DWORD UTF8_DecodeCodePoint(const BYTE * pbString, const BYTE * pbStringEnd, DWORD & dwCodePoint, size_t & ccBytesEaten) +{ + // Reset the number of bytes eaten + dwCodePoint = SFILE_UTF8_INVALID_CHARACTER; + ccBytesEaten = 0; + + if(pbString < pbStringEnd) + { + // At least one byte will be eaten + ccBytesEaten = 1; + + // 1st code point (0x00 - 0x7F, 1 byte) + if(pbString[0] <= 0x7F) + { + // This is the perfect spot to check for filename-unsafe characters + if(UTF8_IsBadFileNameCharacter(pbString[0])) + return ERROR_NO_UNICODE_TRANSLATION; + + // Decode the 1-byte sequence + dwCodePoint = pbString[0]; + return ERROR_SUCCESS; + } + + // 2nd code point (0x80 - 0x7FF, 2 bytes) + if((pbString[0] & 0xE0) == 0xC0 && (pbString + 2) <= pbStringEnd) + { + // Decode the 2-byte sequence + return UTF8_DecodeSequence(pbString, 0x1F, 1, 0x80, 0x7FF, dwCodePoint, ccBytesEaten); + } + + // 3rd code point (0x800 - 0xFFFF, 3 bytes) + // Note: MultiByteToWideChar will not decode 0xE0 0xBF 0xBF (--> 0x0FFF), + if((pbString[0] & 0xF0) == 0xE0 && (pbString + 3) <= pbStringEnd) + { + // Decode the 3-byte sequence + return UTF8_DecodeSequence(pbString, 0x0F, 2, 0x800, 0xFFFF, dwCodePoint, ccBytesEaten); + } + + // 4th code point (0x10000 - 0x10FFFF, 4 bytes) + if((pbString[0] & 0xF8) == 0xF0 && (pbString + 4) <= pbStringEnd) + { + // Try to decode 4-byte sequence + return UTF8_DecodeSequence(pbString, 0x07, 3, 0x10000, SFILE_UNICODE_MAX, dwCodePoint, ccBytesEaten); + } + + // An invalid UTF-8 sequence encountered + return ERROR_NO_UNICODE_TRANSLATION; + } + + // No bytes available. Should never happen + assert(false); + return ERROR_BUFFER_OVERFLOW; +} + +static size_t UTF8_EncodeSequence(DWORD dwCodePoint, BYTE LeadingByte, DWORD dwFollowByteCount, LPBYTE Utf8Buffer) +{ + DWORD dwByteShift = dwFollowByteCount * 6; + + // Encode the highest byte + Utf8Buffer[0] = (BYTE)(LeadingByte | (dwCodePoint >> dwByteShift)); + dwByteShift -= 6; + + // Encode the follow bytes + for(DWORD i = 0; i < dwFollowByteCount; i++) + { + // The follow byte must be 10xxxxxx + Utf8Buffer[i + 1] = (BYTE)(0x80 | ((dwCodePoint >> dwByteShift) & 0x3F)); + dwByteShift -= 6; + } + + return dwFollowByteCount + 1; +} + +static size_t UTF8_EncodeCodePoint(DWORD dwCodePoint, LPBYTE Utf8Buffer) +{ + // 0x00 - 0x7F, 1 byte + if(dwCodePoint < 0x80) + return UTF8_EncodeSequence(dwCodePoint, 0x00, 0, Utf8Buffer); + + // 0x80 - 0x7FF + if(dwCodePoint < 0x800) + return UTF8_EncodeSequence(dwCodePoint, 0xC0, 1, Utf8Buffer); + + // 0x800 - 0xFFFF + if(dwCodePoint < 0x10000) + return UTF8_EncodeSequence(dwCodePoint, 0xE0, 2, Utf8Buffer); + + // 0x800 - 0xFFFF + if(dwCodePoint < 0x110000) + return UTF8_EncodeSequence(dwCodePoint, 0xF0, 3, Utf8Buffer); + + // Should never happen + assert(false); + return 0; +} + +static size_t UTF8_FlushInvalidChars(LPTSTR szBuffer, size_t ccBuffer, size_t nOutLength, LPBYTE InvalidChars, size_t nInvalidChars) +{ + // Case 0: No invalid char -> do nothing + if(nInvalidChars == 0) + { + return nOutLength; + } + + // Case 1: One invalid char -> %xx (compatible with previous versions of MPQ Editor) + if(nInvalidChars == 1) + { + // Space for 3 characters needed + if(szBuffer != NULL && (nOutLength + 3) <= ccBuffer) + { + szBuffer[nOutLength] = '%'; + SMemBinToStr(szBuffer + nOutLength + 1, ccBuffer - 1, InvalidChars, 1); + } + return nOutLength + 3; + } + + // Case 1: More than one invalid char -> %u[xxyyzz] + else + { + // Enough space for %u[xxyyzz] + size_t nLengthNeeded = nInvalidChars * 2 + 4; + + // Space for 4 characters needed + if(szBuffer != NULL && (nOutLength + nLengthNeeded) <= ccBuffer) + { + memcpy(szBuffer + nOutLength, _T("%u["), 6); + + SMemBinToStr(szBuffer + nOutLength + 3, ccBuffer - 3, InvalidChars, nInvalidChars); + + szBuffer[nOutLength + nLengthNeeded - 1] = ']'; + szBuffer[nOutLength + nLengthNeeded] = 0; + } + return nOutLength + nLengthNeeded; + } +} + +size_t UTF8_FlushBinBuffer(LPBYTE pbBuffer, size_t ccBuffer, size_t nOutLength, LPBYTE BinBuffer, size_t nByteCount) +{ + if(pbBuffer != NULL && (nOutLength + nByteCount) < ccBuffer) + memcpy(pbBuffer + nOutLength, BinBuffer, nByteCount); + return nOutLength + nByteCount; +} + +#ifdef STORMLIB_WIDE_CHAR +static size_t UTF16_EncodeCodePoint(DWORD dwCodePoint, unsigned short * Utf16Buffer) +{ + // https://en.wikipedia.org/wiki/UTF-16 + if(dwCodePoint <= 0xFFFF) + { + Utf16Buffer[0] = (unsigned short)(dwCodePoint); + return 1; + } + + if(dwCodePoint <= SFILE_UNICODE_MAX) + { + // Fix the code point + dwCodePoint -= 0x10000; + + // Split the code point to two 10-bit values + Utf16Buffer[0] = (unsigned short)(0xD800 + (dwCodePoint >> 10)); // High 6 bytes + Utf16Buffer[1] = (unsigned short)(0xDC00 + (dwCodePoint & 0x3FF)); // Low 10 bytes + return 2; + } + + // Should never happen + assert(false); + return 0; +} + +static DWORD UTF16_DecodeCodePoint(LPCTSTR szString, LPCTSTR szStringEnd, DWORD & dwCodePoint, size_t & ccCharsEaten) +{ + // Reset the number of bytes eaten + dwCodePoint = SFILE_UTF8_INVALID_CHARACTER; + ccCharsEaten = 0; + + if(szString < szStringEnd) + { + // At least one char will be eaten + ccCharsEaten = 1; + + // Check for an invalid surrogate pair + if(0xDC00 <= szString[0] && szString[0] <= 0xDFFF) + { + dwCodePoint = SFILE_UTF8_INVALID_CHARACTER; + return ERROR_NO_UNICODE_TRANSLATION; + } + + // Check for a valid surrogate pair + if(0xD800 <= szString[0] && szString[0] <= 0xDBFF && (szString + 1) < szStringEnd) + { + dwCodePoint = ((szString[0] - 0xD800) << 10) | (szString[1] - 0xDC00) + 0x10000; + ccCharsEaten = 2; + return ERROR_SUCCESS; + } + + // Direct encoding + dwCodePoint = szString[0]; + ccCharsEaten = 1; + return ERROR_SUCCESS; + } + + // No bytes available. Should never happen + assert(false); + return ERROR_BUFFER_OVERFLOW; +} +#endif + +size_t UTF16_IsEncodedCharSequence(LPCTSTR szString, LPCTSTR szStringEnd, LPBYTE BinBuffer) +{ + size_t nEncodedChars = 0; + + if((szString + 1) < szStringEnd && *szString++ == '%') + { + if((szString + 1) < szStringEnd && *szString++ == 'u') + { + if((szString + 1) < szStringEnd && *szString++ == '[') + { + // Keep going as long as we can convert + for(size_t i = 0; i < MAX_INVALID_CHARS; i++) + { + if(szString + (i * 2) >= szStringEnd) + break; + if(szString[i * 2] == ']') + break; + nEncodedChars++; + } + + // Did we encounter the end of the string? + if(szString + (nEncodedChars * 2) + 1 <= szStringEnd && szString[nEncodedChars * 2] == ']') + { + TCHAR HexaString[MAX_INVALID_CHARS * 2 + 1]; + + // Copy the hexadecimal string + memcpy(HexaString, szString, (nEncodedChars * 2) * sizeof(TCHAR)); + HexaString[nEncodedChars * 2] = 0; + + // Try to decode the hexa string + if(SMemStrToBin(HexaString, BinBuffer, nEncodedChars) == ERROR_SUCCESS) + { + return nEncodedChars; + } + } + } + } + } + return 0; +} + +//----------------------------------------------------------------------------- +// Public (exported) functions + +// Conversion of MPQ file name to file-name-safe string +DWORD WINAPI SMemUTF8ToFileName( + LPTSTR szBuffer, // Pointer to the output buffer. If NULL, the function will calulate the needed length + size_t ccBuffer, // Length of the output buffer (must include EOS) + const void * lpString, // Pointer to the begin of the string + const void * lpStringEnd, // Pointer to the end of string. If NULL, it's assumed to be zero-terminated + DWORD dwFlags, // Additional flags + size_t * pOutLength = NULL) // Pointer to a variable that receives the needed length (optional) +{ + const BYTE * pbStringEnd = (const BYTE *)lpStringEnd; + const BYTE * pbString = (const BYTE *)lpString; + DWORD dwErrCode = ERROR_SUCCESS; + size_t nInvalidChars = 0; + size_t nOutLength = 0; + BYTE InvalidChars[MAX_INVALID_CHARS]; + + // Set the end of the input if not specified + if(pbStringEnd == NULL) + pbStringEnd = pbString + strlen((char *)pbString); + + // Keep conversion as long + while(pbString < pbStringEnd) + { + size_t ccBytesEaten = 0; + size_t nCharLength; + DWORD dwCodePoint = 0; + + // Decode the single UTF-8 char + if((dwErrCode = UTF8_DecodeCodePoint(pbString, pbStringEnd, dwCodePoint, ccBytesEaten)) != ERROR_SUCCESS) + { + // Exactly one byte should be eaten on error + assert(ccBytesEaten == 1); + + // If invalid chars are allowed, we replace the result with 0xFFFD + if(dwFlags & SFILE_UTF8_ALLOW_INVALID_CHARS) + { + // Replace the code point with invalid marker and continue on the next character + dwCodePoint = SFILE_UTF8_INVALID_CHARACTER; + dwErrCode = ERROR_SUCCESS; + } + + // If the invalid chars are not allowed, we put the invalid char to the stack + else + { + // Flush the invalid characters, if full + if(nInvalidChars >= _countof(InvalidChars)) + { + nOutLength = UTF8_FlushInvalidChars(szBuffer, ccBuffer, nOutLength, InvalidChars, nInvalidChars); + nInvalidChars = 0; + } + + // Put the invalid char to the stack + InvalidChars[nInvalidChars++] = pbString[0]; + pbString++; + continue; + } + } + + // Check whether the unicode char is not out of range + assert(dwCodePoint <= SFILE_UNICODE_MAX); + + // Move the source pointer by the number of bytes eaten + pbString = pbString + ccBytesEaten; + + // Flush the invalid characters, if any + nOutLength = UTF8_FlushInvalidChars(szBuffer, ccBuffer, nOutLength, InvalidChars, nInvalidChars); + nInvalidChars = 0; + +#ifdef STORMLIB_WIDE_CHAR + { + unsigned short Utf16Buffer[2]; + + // Encode the code point into UTF-16 + nCharLength = UTF16_EncodeCodePoint(dwCodePoint, Utf16Buffer); + + // Write the encoded UTF-16 to the output buffer, if present + if(szBuffer != NULL && (nOutLength + nCharLength) < ccBuffer) + { + memcpy(szBuffer + nOutLength, Utf16Buffer, nCharLength * sizeof(unsigned short)); + } + } +#else + { + BYTE Utf8Buffer[4]; + + // Encode the code point into UTF-8 + nCharLength = UTF8_EncodeCodePoint(dwCodePoint, Utf8Buffer); + + // Write the encoded UTF-16 to the output buffer, if present + if(szBuffer != NULL && (nOutLength + nCharLength) < ccBuffer) + { + memcpy(szBuffer + nOutLength, Utf8Buffer, nCharLength); + } + } +#endif + + // Increment the output length + nOutLength = nOutLength + nCharLength; + } + + // Flush the invalid characters, if any + nOutLength = UTF8_FlushInvalidChars(szBuffer, ccBuffer, nOutLength, InvalidChars, nInvalidChars); + nInvalidChars = 0; + + // Terminate the string with zero, if we still have space + if(szBuffer != NULL && nOutLength < ccBuffer) + szBuffer[nOutLength] = 0; + nOutLength++; + + // Give the output length, if required + if(pOutLength != NULL) + pOutLength[0] = nOutLength; + return dwErrCode; +} + +DWORD WINAPI SMemFileNameToUTF8( + void * lpBuffer, // Pointer to the output buffer. If NULL, the function will calulate the needed length + size_t ccBuffer, // Length of the output buffer (must include EOS) + const TCHAR * szString, // Pointer to the begin of the string + const TCHAR * szStringEnd, // Pointer to the end of string. If NULL, it's assumed to be zero-terminated + DWORD /* dwFlags */, // Additional flags + size_t * pOutLength = NULL) // Pointer to a variable that receives the needed length in bytes (optional) +{ + LPBYTE pbBuffer = (LPBYTE)lpBuffer; + size_t nOutLength = 0; + DWORD dwErrCode = ERROR_SUCCESS; + + // Set the end of the input if not specified + if(szStringEnd == NULL) + szStringEnd = szString + _tcslen(szString); + + // Keep conversion as long + while(szString < szStringEnd) + { + size_t ccCharsEaten = 0; + size_t nUtf8Length; + DWORD dwCodePoint = 0; + BYTE Utf8Buffer[MAX_INVALID_CHARS]; + + // Check for encoded sequence of bytes + if(szString[0] == '%') + { + // If there is a single hexa number ("%c7"), decode that number + if((szString + 3) <= szStringEnd) + { + TCHAR HexaString[3] = {0}; + + HexaString[0] = szString[1]; + HexaString[1] = szString[2]; + if(SMemStrToBin(HexaString, Utf8Buffer, 1) == ERROR_SUCCESS) + { + nOutLength = UTF8_FlushBinBuffer(pbBuffer, ccBuffer, nOutLength, Utf8Buffer, 1); + szString += 3; + continue; + } + } + + // If there is an escaped sequence ("%u[aabbcc]"), decode that sequence + if((nUtf8Length = UTF16_IsEncodedCharSequence(szString, szStringEnd, Utf8Buffer)) != 0) + { + nOutLength = UTF8_FlushBinBuffer(pbBuffer, ccBuffer, nOutLength, Utf8Buffer, nUtf8Length); + szString += (nUtf8Length * 2) + 4; + continue; + } + } + +#ifdef STORMLIB_WIDE_CHAR + // Try to decode the code point from UTF-16 + if((dwErrCode = UTF16_DecodeCodePoint(szString, szStringEnd, dwCodePoint, ccCharsEaten)) != ERROR_SUCCESS) + return dwErrCode; +#else + // Try to decode the code point from UTF-16 + if((dwErrCode = UTF8_DecodeCodePoint((const BYTE *)szString, (const BYTE *)szStringEnd, dwCodePoint, ccCharsEaten)) != ERROR_SUCCESS) + return dwErrCode; +#endif + + // Check whether the unicode char is not out of range + assert(dwCodePoint <= SFILE_UNICODE_MAX); + + // Move the source pointer by the number of bytes eaten + szString = szString + ccCharsEaten; + + // Encode the UNICODE char + nUtf8Length = UTF8_EncodeCodePoint(dwCodePoint, Utf8Buffer); + + // Do we have enough space in the buffer? + if(pbBuffer != NULL && (nOutLength + nUtf8Length) < ccBuffer) + { + // Write the encoded UTF-16 to the output + memcpy(pbBuffer + nOutLength, Utf8Buffer, nUtf8Length); + } + + // Increment the output length + nOutLength = nOutLength + nUtf8Length; + } + + // Terminate the string with zero, if we still have space + if(pbBuffer != NULL && nOutLength < ccBuffer) + pbBuffer[nOutLength] = 0; + nOutLength++; + + // Give the output length, if required + if(pOutLength != NULL) + pOutLength[0] = nOutLength; + return dwErrCode; +} diff --git a/src/StormCommon.h b/src/StormCommon.h index c050093..1e67a27 100644 --- a/src/StormCommon.h +++ b/src/StormCommon.h @@ -154,14 +154,15 @@ extern LCID g_lcFileLocale; // Preferred file locale and pla //----------------------------------------------------------------------------- // Conversion to uppercase/lowercase (and "/" to "\") -extern unsigned char AsciiToLowerTable[256]; -extern unsigned char AsciiToUpperTable[256]; +extern const unsigned char AsciiToLowerTable[256]; +extern const unsigned char AsciiToUpperTable[256]; +extern const unsigned char SMemCharToByte[0x80]; //----------------------------------------------------------------------------- // Safe string functions template -XCHAR * IntToString(XCHAR * szBuffer, size_t cchMaxChars, XINT nValue, size_t nDigitCount = 0) +XCHAR * SMemIntToStr(XCHAR * szBuffer, size_t cchMaxChars, XINT nValue, size_t nDigitCount = 0) { XCHAR * szBufferEnd = szBuffer + cchMaxChars - 1; XCHAR szNumberRev[0x20]; @@ -197,6 +198,72 @@ XCHAR * IntToString(XCHAR * szBuffer, size_t cchMaxChars, XINT nValue, size_t nD return szBuffer; } +template +DWORD SMemBinToStr(XCHAR * szBuffer, size_t cchBuffer, const void * pvBinary, size_t cbBinary) +{ + const unsigned char * pbBinary = (const unsigned char *)pvBinary; + const char * SMemIntToHex = "0123456789abcdef"; + + // The size of the string must be enough to hold the binary + EOS + if(cchBuffer < ((cbBinary * 2) + 1)) + return ERROR_INSUFFICIENT_BUFFER; + + // Convert the string to the array of MD5 + // Copy the blob data as text + for(size_t i = 0; i < cbBinary; i++) + { + *szBuffer++ = SMemIntToHex[pbBinary[0] >> 0x04]; + *szBuffer++ = SMemIntToHex[pbBinary[0] & 0x0F]; + pbBinary++; + } + + // Terminate the string + *szBuffer = 0; + return ERROR_SUCCESS; +} + +template +DWORD SMemStrToBin(const XCHAR * szString, void * pvBinary, size_t cbBinary, size_t * PtrBinary = NULL) +{ + LPBYTE pbBinary = (LPBYTE)pvBinary; + LPBYTE pbBinaryEnd = pbBinary + cbBinary; + LPBYTE pbSaveBinary = pbBinary; + + // Verify parameter + if(szString != NULL && szString[0] != 0) + { + // Work as long as we have at least 2 characters ready + while(szString[0] != 0 && szString[1] != 0) + { + // Convert both to unsigned char to get rid of negative indexes produced by szString[x] + BYTE StringByte0 = (BYTE)szString[0]; + BYTE StringByte1 = (BYTE)szString[1]; + + // Each character must be within the range of 0x80 + if(StringByte0 > 0x80 || StringByte1 > 0x80) + return ERROR_INVALID_PARAMETER; + if(SMemCharToByte[StringByte0] == 0xFF || SMemCharToByte[StringByte1] == 0xFF) + return ERROR_INVALID_PARAMETER; + + // Overflow check + if(pbBinary >= pbBinaryEnd) + return ERROR_INSUFFICIENT_BUFFER; + + *pbBinary++ = (SMemCharToByte[StringByte0] << 0x04) | SMemCharToByte[StringByte1]; + szString += 2; + } + + // Odd number of chars? + if(szString[0] != 0 && szString[1] == 0) + return ERROR_INVALID_PARAMETER; + } + + // Give the length + if(PtrBinary != NULL) + PtrBinary[0] = pbBinary - pbSaveBinary; + return ERROR_SUCCESS; +} + char * StringCopy(char * szTarget, size_t cchTarget, const char * szSource); void StringCat(char * szTarget, size_t cchTargetMax, const char * szSource); void StringCreatePseudoFileName(char * szBuffer, size_t cchMaxChars, unsigned int nIndex, const char * szExtension); diff --git a/src/StormLib.h b/src/StormLib.h index 4072a7d..bf884c5 100644 --- a/src/StormLib.h +++ b/src/StormLib.h @@ -1,7 +1,7 @@ /*****************************************************************************/ /* StormLib.h Copyright (c) Ladislav Zezula 1999-2017 */ /*---------------------------------------------------------------------------*/ -/* StormLib library v 9.22 */ +/* StormLib library v 9.30 */ /* */ /* Author : Ladislav Zezula */ /* E-mail : ladik@zezula.net */ @@ -74,6 +74,7 @@ /* 12.12.16 9.21 Lad Release 9.21 */ /* 10.11.17 9.22 Lad Release 9.22 */ /* 28.09.22 9.24 Lad lcLocale -> lcFileLocale, also contains platform */ +/* 01.11.24 9.30 Lad Added conversion from UTF-8 to file name and back */ /*****************************************************************************/ #ifndef __STORMLIB_H__ @@ -143,8 +144,8 @@ extern "C" { //----------------------------------------------------------------------------- // Defines -#define STORMLIB_VERSION 0x091A // Current version of StormLib -#define STORMLIB_VERSION_STRING "9.26" // Current version of StormLib as string +#define STORMLIB_VERSION 0x091E // Current version of StormLib +#define STORMLIB_VERSION_STRING "9.30" // Current version of StormLib as string #define ID_MPQ 0x1A51504D // MPQ archive header ID ('MPQ\x1A') #define ID_MPQ_USERDATA 0x1B51504D // MPQ userdata entry ('MPQ\x1B') @@ -1131,6 +1132,30 @@ int WINAPI SCompCompress (void * pvOutBuffer, int * pcbOutBuffer, void * pv int WINAPI SCompDecompress (void * pvOutBuffer, int * pcbOutBuffer, void * pvInBuffer, int cbInBuffer); int WINAPI SCompDecompress2(void * pvOutBuffer, int * pcbOutBuffer, void * pvInBuffer, int cbInBuffer); +//----------------------------------------------------------------------------- +// Conversion of UTF-8 (MPQ listfiles) into file name safe strings + +#define SFILE_UTF8_ALLOW_INVALID_CHARS 0x01 // If set, then the function will treat invalid chars like like MultiByteToWideChar +#define SFILE_UTF8_INVALID_CHARACTER 0xFFFD // Marker of an invalid character +#define SFILE_UNICODE_MAX 0x10FFFF // The highest valid UNICODE char + +// Conversion of MPQ file name to file-name-safe string +DWORD WINAPI SMemUTF8ToFileName( + TCHAR * szBuffer, // Pointer to the output buffer. If NULL, the function will calulate the needed length + size_t ccBuffer, // Length of the output buffer (must include EOS) + const void * lpString, // Pointer to the begin of the string + const void * lpStringEnd, // Pointer to the end of string. If NULL, it's assumed to be zero-terminated + DWORD dwFlags, // Additional flags + size_t * pOutLength); // Pointer to a variable that receives the needed length (optional) + +DWORD WINAPI SMemFileNameToUTF8( + void * lpBuffer, // Pointer to the output buffer. If NULL, the function will calulate the needed length + size_t ccBuffer, // Length of the output buffer (must include EOS) + const TCHAR * szString, // Pointer to the begin of the string + const TCHAR * szStringEnd, // Pointer to the end of string. If NULL, it's assumed to be zero-terminated + DWORD dwFlags, // Reserved + size_t * pOutLength); // Pointer to a variable that receives the needed length in bytes (optional) + //----------------------------------------------------------------------------- // Non-Windows support for SetLastError/GetLastError diff --git a/src/StormPort.h b/src/StormPort.h index aa309e1..f00afd6 100644 --- a/src/StormPort.h +++ b/src/StormPort.h @@ -44,6 +44,10 @@ #define _CRT_NON_CONFORMING_SWPRINTFS #endif + #if defined(UNICODE) || defined(_UNICODE) + #define STORMLIB_WIDE_CHAR + #endif + #include #include #include @@ -52,15 +56,9 @@ // Suppress definitions of `min` and `max` macros by : #define NOMINMAX 1 #include - #include - #define STORMLIB_LITTLE_ENDIAN - #ifdef _WIN64 - #define STORMLIB_64BIT - #else - #define STORMLIB_32BIT - #endif + #define STORMLIB_LITTLE_ENDIAN #define STORMLIB_CDECL __cdecl @@ -74,7 +72,6 @@ #if !defined(STORMLIB_PLATFORM_DEFINED) && defined(__APPLE__) // Mac BSD API - // Macintosh #include #include #include @@ -320,13 +317,8 @@ // Definition of Windows-specific types for non-Windows platforms #ifndef STORMLIB_WINDOWS - #if __LP64__ - #define STORMLIB_64BIT - #else - #define STORMLIB_32BIT - #endif - // __cdecl meand nothing on non-Windows + // __cdecl means nothing on non-Windows #define STORMLIB_CDECL /* */ // Typedefs for ANSI C @@ -340,7 +332,7 @@ typedef long long LONGLONG; typedef unsigned long long ULONGLONG; typedef void * HANDLE; - typedef void * LPOVERLAPPED; // Unsupported on Linux and Mac + typedef void * LPOVERLAPPED; typedef char TCHAR; typedef unsigned int LCID; typedef LONG * PLONG; @@ -351,7 +343,7 @@ typedef char * LPTSTR; typedef char * LPSTR; - #ifdef STORMLIB_32BIT + #ifndef __LP64__ #define _LZMA_UINT32_IS_ULONG #endif @@ -409,12 +401,14 @@ #define ERROR_DISK_FULL ENOSPC #define ERROR_ALREADY_EXISTS EEXIST #define ERROR_INSUFFICIENT_BUFFER ENOBUFS - #define ERROR_BAD_FORMAT 1000 // No such error code under Linux - #define ERROR_NO_MORE_FILES 1001 // No such error code under Linux - #define ERROR_HANDLE_EOF 1002 // No such error code under Linux - #define ERROR_CAN_NOT_COMPLETE 1003 // No such error code under Linux - #define ERROR_FILE_CORRUPT 1004 // No such error code under Linux - #define ERROR_BUFFER_OVERFLOW 1005 // No such error code under Linux + #define ERROR_BAD_FORMAT 1000 // No such error codes under Linux + #define ERROR_NO_MORE_FILES 1001 + #define ERROR_HANDLE_EOF 1002 + #define ERROR_CAN_NOT_COMPLETE 1003 + #define ERROR_FILE_CORRUPT 1004 + #define ERROR_BUFFER_OVERFLOW 1005 + #define ERROR_INVALID_DATA 1006 + #define ERROR_NO_UNICODE_TRANSLATION 1007 #endif // Macros that can sometimes be missing diff --git a/src/wdk/sources-cpp.cpp b/src/wdk/sources-cpp.cpp index f289975..a9d7ba3 100644 --- a/src/wdk/sources-cpp.cpp +++ b/src/wdk/sources-cpp.cpp @@ -24,3 +24,4 @@ #include "src\SFilePatchArchives.cpp" #include "src\SFileReadFile.cpp" #include "src\SFileVerify.cpp" +#include "src\SMemUtf8.cpp" diff --git a/test/StormTest.cpp b/test/StormTest.cpp index 0312abb..7cd885d 100755 --- a/test/StormTest.cpp +++ b/test/StormTest.cpp @@ -275,6 +275,21 @@ static SFILE_MARKERS MpqMarkers[] = static TCHAR szMpqDirectory[MAX_PATH+1]; size_t cchMpqDirectory = 0; +inline bool AssertTrue(bool bCondition) +{ + if(!bCondition) + { +#ifdef STORMLIB_WINDOWS + __debugbreak(); +#else + assert(false); +#endif + } + return bCondition; +} + +#define ASSERT_TRUE(condition) { if(!AssertTrue(condition)) { return false; } } + static EXTRA_TYPE GetExtraType(const void * pExtra) { if(pExtra != NULL) @@ -328,30 +343,6 @@ LPCTSTR GetRelativePath(LPCTSTR szFullPath) return _T(""); } -// Converts binary array to string. -// The caller must ensure that the buffer has at least ((cbBinary * 2) + 1) characters -template -xchar * StringFromBinary(LPBYTE pbBinary, size_t cbBinary, xchar * szBuffer) -{ - const char * IntToHexChar = "0123456789abcdef"; - xchar * szSaveBuffer = szBuffer; - - // Verify the binary pointer - if(pbBinary && cbBinary) - { - // Convert the bytes to string array - for(size_t i = 0; i < cbBinary; i++) - { - *szBuffer++ = IntToHexChar[pbBinary[i] >> 0x04]; - *szBuffer++ = IntToHexChar[pbBinary[i] & 0x0F]; - } - } - - // Terminate the string - *szBuffer = 0; - return szSaveBuffer; -} - const char * GetFileText(PFILE_DATA pFileData) { const char * szFileText = (const char *)(pFileData->FileData); @@ -1764,7 +1755,7 @@ static DWORD VerifyDataChecksum(TLogHelper & Logger, HANDLE hMpq, DWORD dwSearch // Check the MD5 hash, if given if(IS_VALID_STRING(szNameHash)) { - StringFromBinary(NameHash, MD5_DIGEST_SIZE, szNameHash); + SMemBinToStr(szNameHash, _countof(szNameHash), NameHash, MD5_DIGEST_SIZE); if(_stricmp(szNameHash, szExpectedHash)) { Logger.PrintMessage("Extracted files MD5 mismatch (expected: %s, obtained: %s)", szExpectedHash, szNameHash); @@ -3785,23 +3776,91 @@ static DWORD TestReplaceFile(LPCTSTR szMpqPlainName, LPCTSTR szFilePlainName, LP return dwErrCode; } -static void Test_PlayingSpace() +static bool TestUtfConversion(const void * lpString) { - HANDLE hFile = NULL; - HANDLE hMpq = NULL; + LPTSTR szBuffer; + LPBYTE pbBuffer; + size_t nLength1 = 0; + size_t nLength2 = 0; + DWORD dwErrCode1; + DWORD dwErrCode2; + TCHAR szWideBuffer[1]; + BYTE szByteBuffer[1]; + int nResult; + + // Get the number of bytes of the buffer while the output buffer is 0 + dwErrCode1 = SMemUTF8ToFileName(NULL, 0, lpString, NULL, 0, &nLength1); + + // Check the number of bytes when the buffer is non-NULL, but buffer length is insufficient + dwErrCode2 = SMemUTF8ToFileName(szWideBuffer, _countof(szWideBuffer), lpString, NULL, 0, &nLength2); + ASSERT_TRUE(dwErrCode2 == dwErrCode1); + ASSERT_TRUE(nLength2 == nLength1); + + // Check the number of bytes when the buffer is non-NULL, and buffer length is sufficient + if((szBuffer = STORM_ALLOC(TCHAR, nLength1)) != NULL) + { + dwErrCode2 = SMemUTF8ToFileName(szBuffer, nLength1, lpString, NULL, 0, &nLength2); + ASSERT_TRUE(dwErrCode2 == dwErrCode1); + ASSERT_TRUE(nLength2 == nLength1); + + // Get the number of bytes of the buffer while the output buffer is 0 + dwErrCode1 = SMemFileNameToUTF8(NULL, 0, szBuffer, NULL, 0, &nLength1); + + // Check the number of bytes when the buffer is non-NULL, but buffer length is insufficient + dwErrCode2 = SMemFileNameToUTF8(szByteBuffer, _countof(szByteBuffer), szBuffer, NULL, 0, &nLength2); + ASSERT_TRUE(dwErrCode2 == dwErrCode1); + ASSERT_TRUE(nLength2 == nLength1); + + // Check the conversion into a buffer large enough + if((pbBuffer = STORM_ALLOC(BYTE, nLength1)) != NULL) + { + dwErrCode2 = SMemFileNameToUTF8(pbBuffer, nLength1, szBuffer, NULL, 0, &nLength2); + ASSERT_TRUE(dwErrCode2 == dwErrCode1); + ASSERT_TRUE(nLength2 == nLength1); - if(SFileOpenArchive(_T("c:\\RedHero vs 7Com22 (Final Stage GOD).scx"), 0, 0, &hMpq)) - { - SFileSetLocale(0x409); + nResult = memcmp(pbBuffer, lpString, nLength1); + ASSERT_TRUE(nResult == 0); + + STORM_FREE(pbBuffer); + } + + STORM_FREE(szBuffer); + } + return true; +} - if(SFileOpenFileEx(hMpq, "staredit\\scenario.chk", 0, &hFile)) +static DWORD TestUtf8Conversions(const BYTE * szTestString, const TCHAR * szListFile) +{ + SFILE_FIND_DATA sf; + HANDLE hFind; + TCHAR szFullPath[MAX_PATH]; + + // Check conversion of the invalid UTF8 string + TestUtfConversion(szTestString); + + // Create full path of the listfile + CreateFullPathName(szFullPath, _countof(szFullPath), szListFileDir, szListFile); + + // Test all file names in the Chinese listfile + hFind = SListFileFindFirstFile(NULL, szFullPath, "*", &sf); + if(hFind != NULL) + { + while(SListFileFindNextFile(hFind, &sf)) { - SFileCloseFile(hFile); + if(!TestUtfConversion(sf.cFileName)) + { + return ERROR_INVALID_DATA; + } } - SFileCloseArchive(hMpq); + SListFileFindClose(hFind); } + + return ERROR_SUCCESS; } +static void Test_PlayingSpace() +{} + //----------------------------------------------------------------------------- // Tables @@ -3814,13 +3873,59 @@ static LPCTSTR szDiabdatMPQ = _T("MPQ_1997_v1_Diablo1_DIABDAT.MPQ"); static const TEST_EXTRA_ONEFILE LfBliz = {ListFile, _T("ListFile_Blizzard.txt")}; static const TEST_EXTRA_ONEFILE LfWotI = {ListFile, _T("ListFile_WarOfTheImmortals.txt")}; +static const TEST_EXTRA_ONEFILE LfBad1 = {ListFile, _T("ListFile_UTF8_Bad.txt")}; static const BYTE szMpqFileNameUTF8[] = {0x4D, 0x50, 0x51, 0x5F, 0x32, 0x30, 0x32, 0x34, 0x5F, 0x76, 0x31, 0x5F, 0xE6, 0x9D, 0x82, 0xE9, 0xB1, 0xBC, 0xE5, 0x9C, 0xB0, 0xE7, 0x89, 0xA2, 0x5F, 0x30, 0x2E, 0x30, 0x38, 0x34, 0x62, 0x65, 0x74, 0x61, 0x34, 0x36, 0x2E, 0x77, 0x33, 0x78, 0x00}; static const BYTE szLstFileNameUTF8[] = {0x4C, 0x69, 0x73, 0x74, 0x46, 0x69, 0x6C, 0x65, 0x5F, 0xE6, 0x9D, 0x82, 0xE9, 0xB1, 0xBC, 0xE5, 0x9C, 0xB0, 0xE7, 0x89, 0xA2, 0x5F, 0x30, 0x2E, 0x30, 0x38, 0x34, 0x62, 0x65, 0x74, 0x61, 0x34, 0x36, 0x2E, 0x74, 0x78, 0x74, 0x00}; -static const TEST_EXTRA_UTF8 MpqUtf8 = {Utf8File, szMpqFileNameUTF8, szLstFileNameUTF8}; -static const TEST_EXTRA_TWOFILES TwoFilesD1 = {TwoFiles, "music\\dintro.wav", "File00000023.xxx"}; -static const TEST_EXTRA_TWOFILES TwoFilesD2 = {TwoFiles, "waitingroombkgd.dc6"}; +static const BYTE FileNameInvalidUTF8[] = +{ +// Hexadecimal Binary UTF-16 String +// ---- --------------------------------- ------ ------ + 0x7c, // --> 01111100 --> 0x007c %u[7cb7] + 0xb7, // --> 10110111(bad) --> 0xfffd + 0xc9, 0xb7, // --> 11001001 10110111 --> 0x0277 \x0277 + 0xc9, /* ca */ // --> 11001001 11001010(bad) --> 0xfffd %u[c9cac0bde7] + 0xca, /* c0 */ // --> 11001010 11000000(bad) --> 0xfffd + 0xc0, /* bd */ // --> 11000000 10111101(bad) --> 0x003d(bad) + 0xbd, // --> 10111101(bad) --> 0xfffd + 0xe7, /* c4 */ // --> 11100111 11000100(bad) --> 0xfffd + 0xc4, 0xa7, // --> 11000100 10100111 --> 0x0127 \x0127 + 0xca, /* de */ // --> 11001010 11011110(bad) --> 0xfffd %ca + 0xde, 0xbb, // --> 11011110 10111011 --> 0x07bb \x07bb + 0xb6, // --> 10110110(bad) --> 0xfffd %b6 + 0xd3, 0xad, // --> 11010011 10101101 --> 0x04ed \x04ed + 0xc4, /* fa */ // --> 11000100 11111010(bad) --> 0xfffd %u[c4fa] + 0xfa, /* 5f */ // --> 11111010 01011111(bad) --> 0xfffd + 0x5f, // --> 01011111 --> 0x005f _ + 0xa1, // --> 10100001(bad) --> 0xfffd %u[a1eea1f0a1ef] + 0xee, /* a1 f0 */ // --> 11101110 10100001 11110000(bad) --> 0xfffd + 0xa1, // --> 10100001(bad) --> 0xfffd + 0xf0, /* a1 ef */ // --> 11110000 10100001 11101111(bad) --> 0xfffd + 0xa1, // --> 10100001(bad) --> 0xfffd + 0xef, /* 5f */ // --> 11101111 01011111(bad) --> 0xfffd + 0x5f, // --> 01011111 --> 0x005f _ + 0xf0, /* 80 80 80 */ // --> 11110000 10000000 10000000 10000000 --> 0x0000(bad) %u[f0808080] + 0x80, // --> 10000000(bad) --> 0xfffd + 0x80, // --> 10000000(bad) --> 0xfffd + 0x80, // --> 10000000(bad) --> 0xfffd + 0xe9, 0xa3, 0x9e, // --> 11101001 10100011 10011110 --> 0x98de \x98de + 0xe4, 0xb8, 0x96, // --> 11100100 10111000 10010110 --> 0x4e16 \x4e16 + 0xe7, 0x95, 0x8c, // --> 11100111 10010101 10001100 --> 0x754c \x754c + 0xe9, 0xad, 0x94, // --> 11101001 10101101 10010100 --> 0x9b54 \x9b54 + 0xe5, 0x85, 0xbd, // --> 11100101 10000101 10111101 --> 0x517d \x517d + 0xe6, 0xac, 0xa2, // --> 11100110 10101100 10100010 --> 0x6b22 \x6b22 + 0xe8, 0xbf, 0x8e, // --> 11101000 10111111 10001110 --> 0x8fce \x8fce + 0xe6, 0x82, 0xa8, // --> 11100110 10000010 10101000 --> 0x60a8 \x60a8 + 0x2e, // --> 00101110 --> 0x002e \x002e + 0x6d, 0x64, 0x78, // --> 01101101 01100100 01111000 --> ".mdx" + 0x00 // --> 00000000 --> EOS +}; + +static const TEST_EXTRA_UTF8 MpqUtf8 = {Utf8File, szMpqFileNameUTF8, szLstFileNameUTF8}; + +static const TEST_EXTRA_TWOFILES TwoFilesD1 = {TwoFiles, "music\\dintro.wav", "File00000023.xxx"}; +static const TEST_EXTRA_TWOFILES TwoFilesD2 = {TwoFiles, "waitingroombkgd.dc6"}; static const TEST_EXTRA_TWOFILES TwoFilesW3M = {TwoFiles, "file00000002.blp"}; static const TEST_EXTRA_TWOFILES TwoFilesW3X = {TwoFiles, "BlueCrystal.mdx"}; @@ -4046,6 +4151,8 @@ static const TEST_INFO1 Test_OpenMpqs[] = {_T("MPQ_2023_v1_GreenTD.w3x"), NULL, "a8d91fc4e52d7c21ff7feb498c74781a", 2004}, // Corrupt sector checksum table in file #A0 {_T("MPQ_2023_v4_1F644C5A.SC2Replay"), NULL, "b225828ffbf5037553e6a1290187caab", 17}, // Corrupt patch info of the "(attributes)" file {_T(""), NULL, "67faeffd0c0aece205ac8b7282d8ad8e", 4697, &MpqUtf8}, // Chinese name of the MPQ + {_T("MPQ_2024_v1_BadUtf8_5.0.2.w3x"), NULL, "be34f9862758f021a1c6c77df3cd4f05", 6393, &LfBad1}, // Bad UTF-8 sequences in file names + // Protected archives {_T("MPQ_2002_v1_ProtectedMap_InvalidUserData.w3x"), NULL, "b900364cc134a51ddeca21a13697c3ca", 79}, @@ -4224,6 +4331,9 @@ int _tmain(int argc, TCHAR * argv[]) // Placeholder function for various testing purposes Test_PlayingSpace(); + // Test the UTF-8 conversions + TestUtf8Conversions(FileNameInvalidUTF8, LfBad1.szFile); + #ifdef TEST_COMMAND_LINE // Test-open MPQs from the command line. They must be plain name // and must be placed in the Test-MPQs folder @@ -4235,10 +4345,9 @@ int _tmain(int argc, TCHAR * argv[]) #ifdef TEST_LOCAL_LISTFILE // Tests on a local listfile if(dwErrCode == ERROR_SUCCESS) - { - TestOnLocalListFile(_T("FLAT-MAP:listfile-test.txt")); + dwErrCode = TestOnLocalListFile(_T("FLAT-MAP:listfile-test.txt")); + if(dwErrCode == ERROR_SUCCESS) dwErrCode = TestOnLocalListFile(_T("listfile-test.txt")); - } #endif // TEST_LOCAL_LISTFILE #ifdef TEST_STREAM_OPERATIONS // Test file stream operations diff --git a/test/stormlib-test-001.txt b/test/stormlib-test-001.txt index db8df0e..1225add 100644 --- a/test/stormlib-test-001.txt +++ b/test/stormlib-test-001.txt @@ -1,4 +1,4 @@ -==== Test Suite for StormLib version 9.26 ==== +==== Test Suite for StormLib version 9.30 ==== InitWorkFolder: Work directory \Multimedia\MPQs (default) TestLiFiSearch (FLAT-MAP:listfile-test.txt) succeeded. TestLiFiSearch (listfile-test.txt) succeeded. @@ -61,6 +61,7 @@ TestReadingMpq (MPQ_2023_v4_UTF8.s2ma) succeeded. TestReadingMpq (MPQ_2023_v1_GreenTD.w3x) succeeded. TestReadingMpq (MPQ_2023_v4_1F644C5A.SC2Replay) succeeded. TestReadingMpq () succeeded. +TestReadingMpq (MPQ_2024_v1_BadUtf8_5.0.2.w3x) succeeded. TestReadingMpq (MPQ_2002_v1_ProtectedMap_InvalidUserData.w3x) succeeded. TestReadingMpq (MPQ_2002_v1_ProtectedMap_InvalidMpqFormat.w3x) succeeded. TestReadingMpq (MPQ_2002_v1_ProtectedMap_Spazzler.w3x) succeeded. -- cgit v1.2.3