diff options
| -rw-r--r-- | CMakeLists.txt | 1 | ||||
| -rw-r--r-- | StormLib.vcxproj | 1 | ||||
| -rw-r--r-- | StormLib.vcxproj.filters | 3 | ||||
| -rw-r--r-- | StormLib_dll.vcxproj | 1 | ||||
| -rw-r--r-- | StormLib_dll.vcxproj.filters | 3 | ||||
| -rw-r--r-- | StormLib_test.vcxproj | 1 | ||||
| -rw-r--r-- | StormLib_test.vcxproj.filters | 3 | ||||
| -rw-r--r-- | StormLib_vs08.vcproj | 164 | ||||
| -rw-r--r-- | StormLib_vs08_dll.vcproj | 84 | ||||
| -rw-r--r-- | StormLib_vs08_test.vcproj | 36 | ||||
| -rw-r--r-- | src/FileStream.cpp | 4 | ||||
| -rw-r--r-- | src/SBaseCommon.cpp | 6 | ||||
| -rw-r--r-- | src/SMemUtf8.cpp | 551 | ||||
| -rw-r--r-- | src/StormCommon.h | 73 | ||||
| -rw-r--r-- | src/StormLib.h | 31 | ||||
| -rw-r--r-- | src/StormPort.h | 38 | ||||
| -rw-r--r-- | src/wdk/sources-cpp.cpp | 1 | ||||
| -rwxr-xr-x | test/StormTest.cpp | 189 | ||||
| -rw-r--r-- | test/stormlib-test-001.txt | 3 | 
19 files changed, 1119 insertions, 74 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 5bf15df..8d17ebe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,7 @@ set(SRC_FILES             src/SFilePatchArchives.cpp             src/SFileReadFile.cpp             src/SFileVerify.cpp +           src/SMemUtf8.cpp             src/libtomcrypt/src/pk/rsa/rsa_verify_simple.c             src/libtomcrypt/src/misc/crypt_libc.c  ) diff --git a/StormLib.vcxproj b/StormLib.vcxproj index 26d8f70..57cea24 100644 --- a/StormLib.vcxproj +++ b/StormLib.vcxproj @@ -1006,6 +1006,7 @@      <ClCompile Include="src\lzma\C\Threads.c" />      <ClCompile Include="src\pklib\explode.c" />      <ClCompile Include="src\pklib\implode.c" /> +    <ClCompile Include="src\SMemUtf8.cpp" />      <ClCompile Include="src\sparse\sparse.cpp" />      <ClCompile Include="src\zlib\adler32.c" />      <ClCompile Include="src\zlib\compress_zlib.c" /> diff --git a/StormLib.vcxproj.filters b/StormLib.vcxproj.filters index 22c9793..024fa9d 100644 --- a/StormLib.vcxproj.filters +++ b/StormLib.vcxproj.filters @@ -217,5 +217,8 @@      <ClCompile Include="src\LibTomMathDesc.c">        <Filter>Source Files</Filter>      </ClCompile> +    <ClCompile Include="src\SMemUtf8.cpp"> +      <Filter>Source Files</Filter> +    </ClCompile>    </ItemGroup>  </Project>
\ No newline at end of file diff --git a/StormLib_dll.vcxproj b/StormLib_dll.vcxproj index f437bd9..67899f5 100644 --- a/StormLib_dll.vcxproj +++ b/StormLib_dll.vcxproj @@ -322,6 +322,7 @@        <WarningLevel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Level4</WarningLevel>        <WarningLevel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Level4</WarningLevel>      </ClCompile> +    <ClCompile Include="src\SMemUtf8.cpp" />      <ClCompile Include="src\sparse\sparse.cpp" />      <ClCompile Include="src\zlib\adler32.c" />      <ClCompile Include="src\zlib\compress_zlib.c" /> diff --git a/StormLib_dll.vcxproj.filters b/StormLib_dll.vcxproj.filters index 2eaf026..0e05d88 100644 --- a/StormLib_dll.vcxproj.filters +++ b/StormLib_dll.vcxproj.filters @@ -220,6 +220,9 @@      <ClCompile Include="src\LibTomMathDesc.c">        <Filter>Source Files</Filter>      </ClCompile> +    <ClCompile Include="src\SMemUtf8.cpp"> +      <Filter>Source Files</Filter> +    </ClCompile>    </ItemGroup>    <ItemGroup>      <ResourceCompile Include="src\DllMain.rc"> diff --git a/StormLib_test.vcxproj b/StormLib_test.vcxproj index 18cee5b..71aebf9 100644 --- a/StormLib_test.vcxproj +++ b/StormLib_test.vcxproj @@ -336,6 +336,7 @@        <WarningLevel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Level4</WarningLevel>        <WarningLevel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Level4</WarningLevel>      </ClCompile> +    <ClCompile Include="src\SMemUtf8.cpp" />      <ClCompile Include="src\sparse\sparse.cpp" />      <ClCompile Include="src\zlib\adler32.c" />      <ClCompile Include="src\zlib\compress_zlib.c" /> diff --git a/StormLib_test.vcxproj.filters b/StormLib_test.vcxproj.filters index d341b12..cf1aa5e 100644 --- a/StormLib_test.vcxproj.filters +++ b/StormLib_test.vcxproj.filters @@ -226,5 +226,8 @@      <ClCompile Include="src\LibTomMathDesc.c">        <Filter>Source Files</Filter>      </ClCompile> +    <ClCompile Include="src\SMemUtf8.cpp"> +      <Filter>Source Files</Filter> +    </ClCompile>    </ItemGroup>  </Project>
\ No newline at end of file diff --git a/StormLib_vs08.vcproj b/StormLib_vs08.vcproj index 031dde8..3f8674d 100644 --- a/StormLib_vs08.vcproj +++ b/StormLib_vs08.vcproj @@ -4038,6 +4038,170 @@  					/>  				</FileConfiguration>  			</File> +			<File +				RelativePath=".\src\SMemUtf8.cpp" +				> +				<FileConfiguration +					Name="DebugAD|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="DebugAD|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="DebugAS|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="DebugAS|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="ReleaseAD|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="ReleaseAD|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="ReleaseAS|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="ReleaseAS|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="DebugUD|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="DebugUD|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="DebugUS|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="DebugUS|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="ReleaseUD|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="ReleaseUD|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="ReleaseUS|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="ReleaseUS|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						UsePrecompiledHeader="2" +						PrecompiledHeaderThrough="StormCommon.h" +						WarningLevel="4" +					/> +				</FileConfiguration> +			</File>  			<Filter  				Name="adpcm"  				> diff --git a/StormLib_vs08_dll.vcproj b/StormLib_vs08_dll.vcproj index 66f0705..b51db31 100644 --- a/StormLib_vs08_dll.vcproj +++ b/StormLib_vs08_dll.vcproj @@ -1680,6 +1680,90 @@  					/>  				</FileConfiguration>  			</File> +			<File +				RelativePath=".\src\SMemUtf8.cpp" +				> +				<FileConfiguration +					Name="Debug|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="Debug|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="Release|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="Release|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="DebugAD|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="DebugAD|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="ReleaseAD|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="ReleaseAD|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="ReleaseAS|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="ReleaseAS|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +			</File>  			<Filter  				Name="adpcm"  				> diff --git a/StormLib_vs08_test.vcproj b/StormLib_vs08_test.vcproj index d3506ea..48f2dbd 100644 --- a/StormLib_vs08_test.vcproj +++ b/StormLib_vs08_test.vcproj @@ -1039,6 +1039,42 @@  				</FileConfiguration>  			</File>  			<File +				RelativePath=".\src\SMemUtf8.cpp" +				> +				<FileConfiguration +					Name="Debug|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="Debug|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="Release|Win32" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +				<FileConfiguration +					Name="Release|x64" +					> +					<Tool +						Name="VCCLCompilerTool" +						WarningLevel="4" +					/> +				</FileConfiguration> +			</File> +			<File  				RelativePath=".\test\StormTest.cpp"  				>  				<FileConfiguration diff --git a/src/FileStream.cpp b/src/FileStream.cpp index b66098c..39db0a0 100644 --- a/src/FileStream.cpp +++ b/src/FileStream.cpp @@ -78,7 +78,7 @@ static void CreateNameWithSuffix(LPTSTR szBuffer, size_t cchMaxChars, LPCTSTR sz          *szBuffer++ = '.';
      // Append the number
 -    IntToString(szBuffer, szBufferEnd - szBuffer + 1, nValue);
 +    SMemIntToStr(szBuffer, szBufferEnd - szBuffer + 1, nValue);
  }
  //-----------------------------------------------------------------------------
 @@ -1765,7 +1765,7 @@ static void PartStream_Close(TBlockStream * pStream)          // Make sure that the header is properly BSWAPed
          BSWAP_ARRAY32_UNSIGNED(&PartHeader, sizeof(PART_FILE_HEADER));
 -        IntToString(PartHeader.GameBuildNumber, _countof(PartHeader.GameBuildNumber), pStream->BuildNumber);
 +        SMemIntToStr(PartHeader.GameBuildNumber, _countof(PartHeader.GameBuildNumber), pStream->BuildNumber);
          // Write the part header
          pStream->BaseWrite(pStream, &ByteOffset, &PartHeader, sizeof(PART_FILE_HEADER));
 diff --git a/src/SBaseCommon.cpp b/src/SBaseCommon.cpp index 0de7864..3f95ded 100644 --- a/src/SBaseCommon.cpp +++ b/src/SBaseCommon.cpp @@ -30,7 +30,7 @@ LCID  g_lcFileLocale = 0;                       // Compound of file locale and p  // Converts ASCII characters to lowercase
  // Converts slash (0x2F) to backslash (0x5C)
 -unsigned char AsciiToLowerTable[256] =
 +const unsigned char AsciiToLowerTable[256] =
  {
      0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
      0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
 @@ -52,7 +52,7 @@ unsigned char AsciiToLowerTable[256] =  // Converts ASCII characters to uppercase
  // Converts slash (0x2F) to backslash (0x5C)
 -unsigned char AsciiToUpperTable[256] =
 +const unsigned char AsciiToUpperTable[256] =
  {
      0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
      0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
 @@ -135,7 +135,7 @@ void StringCreatePseudoFileName(char * szBuffer, size_t cchMaxChars, unsigned in      szBuffer = StringCopy(szBuffer, (szBufferEnd - szBuffer), "File");
      // Number
 -    szBuffer = IntToString(szBuffer, szBufferEnd - szBuffer + 1, nIndex, 8);
 +    szBuffer = SMemIntToStr(szBuffer, szBufferEnd - szBuffer + 1, nIndex, 8);
      // Dot
      if(szBuffer < szBufferEnd)
 diff --git a/src/SMemUtf8.cpp b/src/SMemUtf8.cpp new file mode 100644 index 0000000..5832422 --- /dev/null +++ b/src/SMemUtf8.cpp @@ -0,0 +1,551 @@ +/*****************************************************************************/ +/* SFileVerify.cpp                        Copyright (c) Ladislav Zezula 2010 */ +/*---------------------------------------------------------------------------*/ +/* Support for conversion of UTF-8 <-> File name                             */ +/*                                                                           */ +/* File names in the MPQs are assumed to be UTF-8. However, bad sequences    */ +/* or filename unsafe characters are allowed in the list files, but won't    */ +/* work in unpacking files from MPQ to a local file.                         */ +/*                                                                           */ +/* This module contains cross-platform comparable conversion between UTF-8   */ +/* and file names that will produce identical file names across platforms.   */ +/*---------------------------------------------------------------------------*/ +/*   Date    Ver   Who  Comment                                              */ +/* --------  ----  ---  -------                                              */ +/* 31.10.24  1.00  Lad  Created                                              */ +/*****************************************************************************/ + +#define __STORMLIB_SELF__ +#include "StormLib.h" +#include "StormCommon.h" + +//----------------------------------------------------------------------------- +// Local defines + +#define MAX_INVALID_CHARS               128         // Maximum number of invalid characters in a row + +//----------------------------------------------------------------------------- +// Conversion tables + +const unsigned char SMemCharToByte[0x80] = +{ +    //   00    01    02    03    04    05    06    07    08    09    0A    0B    0C    0D    0E    0F +        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0xFF +        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x10 +        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x20 +        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x30 +        0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x40 +        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x50 +        0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x60 +        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF  // 0x70 +}; + +//----------------------------------------------------------------------------- +// Local functions + +// Bit mask of characters that are file name safe. We will maintain +// the same charset even on non-Windows in order to keep the file names equal +static unsigned int FileNameSafeChars[4] = +{ +    0x00000000, 0x2BFF7BFB, 0xFFFFFFFF, 0xEFFFFFFF      // Windows: [0x20-0x7F], except 0x22, 0x2A, 0x2F, 0x3A, 0x3C, 0x3E, 0x3F, 0x7C +//  0xfffffffe, 0xffff7fff, 0xffffffff, 0xffffffff      // Linux:   [0x01-0x7F], except 0x2F +}; + +static bool UTF8_IsBadFileNameCharacter(DWORD ch) +{ +    // It is guaranteed that the character is in range of 0x00 - 0x7F +    assert(ch < 0x80); + +    // Use the bit from the table +    return (FileNameSafeChars[ch / 32] & (1 << (ch % 32))) ? false : true; +} + +static DWORD UTF8_DecodeSequence(const BYTE * pbString, BYTE BitsMask, size_t ccFollowBytes, DWORD dwMinValue, DWORD dwMaxValue, DWORD & dwCodePoint, size_t & ccBytesEaten) +{ +    const BYTE * pbSaveString = pbString; +    DWORD dwAccumulator; + +    // Extract the low bits from the leading byte +    dwAccumulator = pbString[0] & BitsMask; +    ccBytesEaten = 1; +    pbString++; + +    // Process the follow-up bytes +    for(size_t i = 0; i < ccFollowBytes; i++) +    { +        // Every follow-up byte in the UTF-8 sequence must start with 10xxxxxx +        if((pbString[0] & 0xC0) != 0x80) +            return ERROR_NO_UNICODE_TRANSLATION; + +        // Add 6 bits to the accumulator +        dwAccumulator = (dwAccumulator << 6) | (*pbString++ & 0x3F); +    } + +    // Check whether the code point is in the given range +    if(!(dwMinValue <= dwAccumulator && dwAccumulator <= dwMaxValue)) +        return ERROR_INVALID_DATA; + +    // Give the number of bytes eaten and the decoded code point +    ccBytesEaten = (pbString - pbSaveString); +    dwCodePoint = dwAccumulator; +    return ERROR_SUCCESS; +} + +// https://en.wikipedia.org/wiki/UTF-8 +static DWORD UTF8_DecodeCodePoint(const BYTE * pbString, const BYTE * pbStringEnd, DWORD & dwCodePoint, size_t & ccBytesEaten) +{ +    // Reset the number of bytes eaten +    dwCodePoint = SFILE_UTF8_INVALID_CHARACTER; +    ccBytesEaten = 0; + +    if(pbString < pbStringEnd) +    { +        // At least one byte will be eaten +        ccBytesEaten = 1; + +        // 1st code point (0x00 - 0x7F, 1 byte) +        if(pbString[0] <= 0x7F) +        { +            // This is the perfect spot to check for filename-unsafe characters +            if(UTF8_IsBadFileNameCharacter(pbString[0])) +                return ERROR_NO_UNICODE_TRANSLATION; + +            // Decode the 1-byte sequence +            dwCodePoint = pbString[0]; +            return ERROR_SUCCESS; +        } + +        // 2nd code point (0x80 - 0x7FF, 2 bytes) +        if((pbString[0] & 0xE0) == 0xC0 && (pbString + 2) <= pbStringEnd) +        { +            // Decode the 2-byte sequence +            return UTF8_DecodeSequence(pbString, 0x1F, 1, 0x80, 0x7FF, dwCodePoint, ccBytesEaten); +        } + +        // 3rd code point (0x800 - 0xFFFF, 3 bytes) +        // Note: MultiByteToWideChar will not decode 0xE0 0xBF 0xBF (--> 0x0FFF), +        if((pbString[0] & 0xF0) == 0xE0 && (pbString + 3) <= pbStringEnd) +        { +            // Decode the 3-byte sequence +            return UTF8_DecodeSequence(pbString, 0x0F, 2, 0x800, 0xFFFF, dwCodePoint, ccBytesEaten); +        } + +        // 4th code point (0x10000 - 0x10FFFF, 4 bytes) +        if((pbString[0] & 0xF8) == 0xF0 && (pbString + 4) <= pbStringEnd) +        { +            // Try to decode 4-byte sequence +            return UTF8_DecodeSequence(pbString, 0x07, 3, 0x10000, SFILE_UNICODE_MAX, dwCodePoint, ccBytesEaten); +        } + +        // An invalid UTF-8 sequence encountered +        return ERROR_NO_UNICODE_TRANSLATION; +    } + +    // No bytes available. Should never happen +    assert(false); +    return ERROR_BUFFER_OVERFLOW; +} + +static size_t UTF8_EncodeSequence(DWORD dwCodePoint, BYTE LeadingByte, DWORD dwFollowByteCount, LPBYTE Utf8Buffer) +{ +    DWORD dwByteShift = dwFollowByteCount * 6; + +    // Encode the highest byte +    Utf8Buffer[0] = (BYTE)(LeadingByte | (dwCodePoint >> dwByteShift)); +    dwByteShift -= 6; + +    // Encode the follow bytes +    for(DWORD i = 0; i < dwFollowByteCount; i++) +    { +        // The follow byte must be 10xxxxxx +        Utf8Buffer[i + 1] = (BYTE)(0x80 | ((dwCodePoint >> dwByteShift) & 0x3F)); +        dwByteShift -= 6; +    } + +    return dwFollowByteCount + 1; +} + +static size_t UTF8_EncodeCodePoint(DWORD dwCodePoint, LPBYTE Utf8Buffer) +{ +    // 0x00 - 0x7F, 1 byte +    if(dwCodePoint < 0x80) +        return UTF8_EncodeSequence(dwCodePoint, 0x00, 0, Utf8Buffer); + +    // 0x80 - 0x7FF +    if(dwCodePoint < 0x800) +        return UTF8_EncodeSequence(dwCodePoint, 0xC0, 1, Utf8Buffer); + +    // 0x800 - 0xFFFF +    if(dwCodePoint < 0x10000) +        return UTF8_EncodeSequence(dwCodePoint, 0xE0, 2, Utf8Buffer); + +    // 0x800 - 0xFFFF +    if(dwCodePoint < 0x110000) +        return UTF8_EncodeSequence(dwCodePoint, 0xF0, 3, Utf8Buffer); + +    // Should never happen +    assert(false); +    return 0; +} + +static size_t UTF8_FlushInvalidChars(LPTSTR szBuffer, size_t ccBuffer, size_t nOutLength, LPBYTE InvalidChars, size_t nInvalidChars) +{ +    // Case 0: No invalid char -> do nothing +    if(nInvalidChars == 0) +    { +        return nOutLength; +    } + +    // Case 1: One invalid char -> %xx (compatible with previous versions of MPQ Editor) +    if(nInvalidChars == 1) +    { +        // Space for 3 characters needed +        if(szBuffer != NULL && (nOutLength + 3) <= ccBuffer) +        { +            szBuffer[nOutLength] = '%'; +            SMemBinToStr(szBuffer + nOutLength + 1, ccBuffer - 1, InvalidChars, 1); +        } +        return nOutLength + 3; +    } + +    // Case 1: More than one invalid char -> %u[xxyyzz] +    else +    { +        // Enough space for %u[xxyyzz] +        size_t nLengthNeeded = nInvalidChars * 2 + 4; + +        // Space for 4 characters needed +        if(szBuffer != NULL && (nOutLength + nLengthNeeded) <= ccBuffer) +        { +            memcpy(szBuffer + nOutLength, _T("%u["), 6); + +            SMemBinToStr(szBuffer + nOutLength + 3, ccBuffer - 3, InvalidChars, nInvalidChars); + +            szBuffer[nOutLength + nLengthNeeded - 1] = ']'; +            szBuffer[nOutLength + nLengthNeeded] = 0; +        } +        return nOutLength + nLengthNeeded; +    } +} + +size_t UTF8_FlushBinBuffer(LPBYTE pbBuffer, size_t ccBuffer, size_t nOutLength, LPBYTE BinBuffer, size_t nByteCount) +{ +    if(pbBuffer != NULL && (nOutLength + nByteCount) < ccBuffer) +        memcpy(pbBuffer + nOutLength, BinBuffer, nByteCount); +    return nOutLength + nByteCount; +} + +#ifdef STORMLIB_WIDE_CHAR +static size_t UTF16_EncodeCodePoint(DWORD dwCodePoint, unsigned short * Utf16Buffer) +{ +    // https://en.wikipedia.org/wiki/UTF-16 +    if(dwCodePoint <= 0xFFFF) +    { +        Utf16Buffer[0] = (unsigned short)(dwCodePoint); +        return 1; +    } + +    if(dwCodePoint <= SFILE_UNICODE_MAX) +    { +        // Fix the code point +        dwCodePoint -= 0x10000; + +        // Split the code point to two 10-bit values +        Utf16Buffer[0] = (unsigned short)(0xD800 + (dwCodePoint >> 10));    // High 6 bytes +        Utf16Buffer[1] = (unsigned short)(0xDC00 + (dwCodePoint & 0x3FF));  // Low 10 bytes +        return 2; +    } + +    // Should never happen +    assert(false); +    return 0; +} + +static DWORD UTF16_DecodeCodePoint(LPCTSTR szString, LPCTSTR szStringEnd, DWORD & dwCodePoint, size_t & ccCharsEaten) +{ +    // Reset the number of bytes eaten +    dwCodePoint = SFILE_UTF8_INVALID_CHARACTER; +    ccCharsEaten = 0; + +    if(szString < szStringEnd) +    { +        // At least one char will be eaten +        ccCharsEaten = 1; + +        // Check for an invalid surrogate pair +        if(0xDC00 <= szString[0] && szString[0] <= 0xDFFF) +        { +            dwCodePoint = SFILE_UTF8_INVALID_CHARACTER; +            return ERROR_NO_UNICODE_TRANSLATION; +        } + +        // Check for a valid surrogate pair +        if(0xD800 <= szString[0] && szString[0] <= 0xDBFF && (szString + 1) < szStringEnd) +        { +            dwCodePoint = ((szString[0] - 0xD800) << 10) | (szString[1] - 0xDC00) + 0x10000; +            ccCharsEaten = 2; +            return ERROR_SUCCESS; +        } + +        // Direct encoding +        dwCodePoint = szString[0]; +        ccCharsEaten = 1; +        return ERROR_SUCCESS; +    } + +    // No bytes available. Should never happen +    assert(false); +    return ERROR_BUFFER_OVERFLOW; +} +#endif + +size_t UTF16_IsEncodedCharSequence(LPCTSTR szString, LPCTSTR szStringEnd, LPBYTE BinBuffer) +{ +    size_t nEncodedChars = 0; + +    if((szString + 1) < szStringEnd && *szString++ == '%') +    { +        if((szString + 1) < szStringEnd && *szString++ == 'u') +        { +            if((szString + 1) < szStringEnd && *szString++ == '[') +            { +                // Keep going as long as we can convert +                for(size_t i = 0; i < MAX_INVALID_CHARS; i++) +                { +                    if(szString + (i * 2) >= szStringEnd) +                        break; +                    if(szString[i * 2] == ']') +                        break; +                    nEncodedChars++; +                } + +                // Did we encounter the end of the string? +                if(szString + (nEncodedChars * 2) + 1 <= szStringEnd && szString[nEncodedChars * 2] == ']') +                { +                    TCHAR HexaString[MAX_INVALID_CHARS * 2 + 1]; + +                    // Copy the hexadecimal string +                    memcpy(HexaString, szString, (nEncodedChars * 2) * sizeof(TCHAR)); +                    HexaString[nEncodedChars * 2] = 0; + +                    // Try to decode the hexa string +                    if(SMemStrToBin(HexaString, BinBuffer, nEncodedChars) == ERROR_SUCCESS) +                    { +                        return nEncodedChars; +                    } +                } +            } +        } +    } +    return 0; +} + +//----------------------------------------------------------------------------- +// Public (exported) functions + +// Conversion of MPQ file name to file-name-safe string +DWORD WINAPI SMemUTF8ToFileName( +    LPTSTR szBuffer,                // Pointer to the output buffer. If NULL, the function will calulate the needed length +    size_t ccBuffer,                // Length of the output buffer (must include EOS) +    const void * lpString,          // Pointer to the begin of the string +    const void * lpStringEnd,       // Pointer to the end of string. If NULL, it's assumed to be zero-terminated +    DWORD dwFlags,                  // Additional flags +    size_t * pOutLength = NULL)     // Pointer to a variable that receives the needed length (optional) +{ +    const BYTE * pbStringEnd = (const BYTE *)lpStringEnd; +    const BYTE * pbString = (const BYTE *)lpString; +    DWORD dwErrCode = ERROR_SUCCESS; +    size_t nInvalidChars = 0; +    size_t nOutLength = 0; +    BYTE InvalidChars[MAX_INVALID_CHARS]; + +    // Set the end of the input if not specified +    if(pbStringEnd == NULL) +        pbStringEnd = pbString + strlen((char *)pbString); + +    // Keep conversion as long +    while(pbString < pbStringEnd) +    { +        size_t ccBytesEaten = 0; +        size_t nCharLength; +        DWORD dwCodePoint = 0; + +        // Decode the single UTF-8 char +        if((dwErrCode = UTF8_DecodeCodePoint(pbString, pbStringEnd, dwCodePoint, ccBytesEaten)) != ERROR_SUCCESS) +        { +            // Exactly one byte should be eaten on error +            assert(ccBytesEaten == 1); + +            // If invalid chars are allowed, we replace the result with 0xFFFD +            if(dwFlags & SFILE_UTF8_ALLOW_INVALID_CHARS) +            { +                // Replace the code point with invalid marker and continue on the next character +                dwCodePoint = SFILE_UTF8_INVALID_CHARACTER; +                dwErrCode = ERROR_SUCCESS; +            } + +            // If the invalid chars are not allowed, we put the invalid char to the stack +            else +            { +                // Flush the invalid characters, if full +                if(nInvalidChars >= _countof(InvalidChars)) +                { +                    nOutLength = UTF8_FlushInvalidChars(szBuffer, ccBuffer, nOutLength, InvalidChars, nInvalidChars); +                    nInvalidChars = 0; +                } + +                // Put the invalid char to the stack +                InvalidChars[nInvalidChars++] = pbString[0]; +                pbString++; +                continue; +            } +        } + +        // Check whether the unicode char is not out of range +        assert(dwCodePoint <= SFILE_UNICODE_MAX); + +        // Move the source pointer by the number of bytes eaten +        pbString = pbString + ccBytesEaten; + +        // Flush the invalid characters, if any +        nOutLength = UTF8_FlushInvalidChars(szBuffer, ccBuffer, nOutLength, InvalidChars, nInvalidChars); +        nInvalidChars = 0; + +#ifdef STORMLIB_WIDE_CHAR +        { +            unsigned short Utf16Buffer[2]; + +            // Encode the code point into UTF-16 +            nCharLength = UTF16_EncodeCodePoint(dwCodePoint, Utf16Buffer); + +            // Write the encoded UTF-16 to the output buffer, if present +            if(szBuffer != NULL && (nOutLength + nCharLength) < ccBuffer) +            { +                memcpy(szBuffer + nOutLength, Utf16Buffer, nCharLength * sizeof(unsigned short)); +            } +        } +#else +        { +            BYTE Utf8Buffer[4]; + +            // Encode the code point into UTF-8 +            nCharLength = UTF8_EncodeCodePoint(dwCodePoint, Utf8Buffer); + +            // Write the encoded UTF-16 to the output buffer, if present +            if(szBuffer != NULL && (nOutLength + nCharLength) < ccBuffer) +            { +                memcpy(szBuffer + nOutLength, Utf8Buffer, nCharLength); +            } +        } +#endif + +        // Increment the output length +        nOutLength = nOutLength + nCharLength; +    } + +    // Flush the invalid characters, if any +    nOutLength = UTF8_FlushInvalidChars(szBuffer, ccBuffer, nOutLength, InvalidChars, nInvalidChars); +    nInvalidChars = 0; + +    // Terminate the string with zero, if we still have space +    if(szBuffer != NULL && nOutLength < ccBuffer) +        szBuffer[nOutLength] = 0; +    nOutLength++; + +    // Give the output length, if required +    if(pOutLength != NULL) +        pOutLength[0] = nOutLength; +    return dwErrCode; +} + +DWORD WINAPI SMemFileNameToUTF8( +    void * lpBuffer,                // Pointer to the output buffer. If NULL, the function will calulate the needed length +    size_t ccBuffer,                // Length of the output buffer (must include EOS) +    const TCHAR * szString,         // Pointer to the begin of the string +    const TCHAR * szStringEnd,      // Pointer to the end of string. If NULL, it's assumed to be zero-terminated +    DWORD /* dwFlags */,            // Additional flags +    size_t * pOutLength = NULL)     // Pointer to a variable that receives the needed length in bytes (optional) +{ +    LPBYTE pbBuffer = (LPBYTE)lpBuffer; +    size_t nOutLength = 0; +    DWORD dwErrCode = ERROR_SUCCESS; + +    // Set the end of the input if not specified +    if(szStringEnd == NULL) +        szStringEnd = szString + _tcslen(szString); + +    // Keep conversion as long +    while(szString < szStringEnd) +    { +        size_t ccCharsEaten = 0; +        size_t nUtf8Length; +        DWORD dwCodePoint = 0; +        BYTE Utf8Buffer[MAX_INVALID_CHARS]; + +        // Check for encoded sequence of bytes +        if(szString[0] == '%') +        { +            // If there is a single hexa number ("%c7"), decode that number +            if((szString + 3) <= szStringEnd) +            { +                TCHAR HexaString[3] = {0}; + +                HexaString[0] = szString[1]; +                HexaString[1] = szString[2]; +                if(SMemStrToBin(HexaString, Utf8Buffer, 1) == ERROR_SUCCESS) +                { +                    nOutLength = UTF8_FlushBinBuffer(pbBuffer, ccBuffer, nOutLength, Utf8Buffer, 1); +                    szString += 3; +                    continue; +                } +            } + +            // If there is an escaped sequence ("%u[aabbcc]"), decode that sequence +            if((nUtf8Length = UTF16_IsEncodedCharSequence(szString, szStringEnd, Utf8Buffer)) != 0) +            { +                nOutLength = UTF8_FlushBinBuffer(pbBuffer, ccBuffer, nOutLength, Utf8Buffer, nUtf8Length); +                szString += (nUtf8Length * 2) + 4; +                continue; +            } +        } + +#ifdef STORMLIB_WIDE_CHAR +        // Try to decode the code point from UTF-16 +        if((dwErrCode = UTF16_DecodeCodePoint(szString, szStringEnd, dwCodePoint, ccCharsEaten)) != ERROR_SUCCESS) +            return dwErrCode; +#else +        // Try to decode the code point from UTF-16 +        if((dwErrCode = UTF8_DecodeCodePoint((const BYTE *)szString, (const BYTE *)szStringEnd, dwCodePoint, ccCharsEaten)) != ERROR_SUCCESS) +            return dwErrCode; +#endif + +        // Check whether the unicode char is not out of range +        assert(dwCodePoint <= SFILE_UNICODE_MAX); + +        // Move the source pointer by the number of bytes eaten +        szString = szString + ccCharsEaten; + +        // Encode the UNICODE char +        nUtf8Length = UTF8_EncodeCodePoint(dwCodePoint, Utf8Buffer); + +        // Do we have enough space in the buffer? +        if(pbBuffer != NULL && (nOutLength + nUtf8Length) < ccBuffer) +        { +            // Write the encoded UTF-16 to the output +            memcpy(pbBuffer + nOutLength, Utf8Buffer, nUtf8Length); +        } + +        // Increment the output length +        nOutLength = nOutLength + nUtf8Length; +    } + +    // Terminate the string with zero, if we still have space +    if(pbBuffer != NULL && nOutLength < ccBuffer) +        pbBuffer[nOutLength] = 0; +    nOutLength++; + +    // Give the output length, if required +    if(pOutLength != NULL) +        pOutLength[0] = nOutLength; +    return dwErrCode; +} diff --git a/src/StormCommon.h b/src/StormCommon.h index c050093..1e67a27 100644 --- a/src/StormCommon.h +++ b/src/StormCommon.h @@ -154,14 +154,15 @@ extern LCID  g_lcFileLocale;                    // Preferred file locale and pla  //-----------------------------------------------------------------------------
  // Conversion to uppercase/lowercase (and "/" to "\")
 -extern unsigned char AsciiToLowerTable[256];
 -extern unsigned char AsciiToUpperTable[256];
 +extern const unsigned char AsciiToLowerTable[256];
 +extern const unsigned char AsciiToUpperTable[256];
 +extern const unsigned char SMemCharToByte[0x80];
  //-----------------------------------------------------------------------------
  // Safe string functions
  template <typename XCHAR, typename XINT>
 -XCHAR * IntToString(XCHAR * szBuffer, size_t cchMaxChars, XINT nValue, size_t nDigitCount = 0)
 +XCHAR * SMemIntToStr(XCHAR * szBuffer, size_t cchMaxChars, XINT nValue, size_t nDigitCount = 0)
  {
      XCHAR * szBufferEnd = szBuffer + cchMaxChars - 1;
      XCHAR szNumberRev[0x20];
 @@ -197,6 +198,72 @@ XCHAR * IntToString(XCHAR * szBuffer, size_t cchMaxChars, XINT nValue, size_t nD      return szBuffer;
  }
 +template <typename XCHAR>
 +DWORD SMemBinToStr(XCHAR * szBuffer, size_t cchBuffer, const void * pvBinary, size_t cbBinary)
 +{
 +    const unsigned char * pbBinary = (const unsigned char *)pvBinary;
 +    const char * SMemIntToHex = "0123456789abcdef";
 +
 +    // The size of the string must be enough to hold the binary + EOS
 +    if(cchBuffer < ((cbBinary * 2) + 1))
 +        return ERROR_INSUFFICIENT_BUFFER;
 +
 +    // Convert the string to the array of MD5
 +    // Copy the blob data as text
 +    for(size_t i = 0; i < cbBinary; i++)
 +    {
 +        *szBuffer++ = SMemIntToHex[pbBinary[0] >> 0x04];
 +        *szBuffer++ = SMemIntToHex[pbBinary[0] & 0x0F];
 +        pbBinary++;
 +    }
 +
 +    // Terminate the string
 +    *szBuffer = 0;
 +    return ERROR_SUCCESS;
 +}
 +
 +template <typename XCHAR>
 +DWORD SMemStrToBin(const XCHAR * szString, void * pvBinary, size_t cbBinary, size_t * PtrBinary = NULL)
 +{
 +    LPBYTE pbBinary = (LPBYTE)pvBinary;
 +    LPBYTE pbBinaryEnd = pbBinary + cbBinary;
 +    LPBYTE pbSaveBinary = pbBinary;
 +
 +    // Verify parameter
 +    if(szString != NULL && szString[0] != 0)
 +    {
 +        // Work as long as we have at least 2 characters ready
 +        while(szString[0] != 0 && szString[1] != 0)
 +        {
 +            // Convert both to unsigned char to get rid of negative indexes produced by szString[x]
 +            BYTE StringByte0 = (BYTE)szString[0];
 +            BYTE StringByte1 = (BYTE)szString[1];
 +
 +            // Each character must be within the range of 0x80
 +            if(StringByte0 > 0x80 || StringByte1 > 0x80)
 +                return ERROR_INVALID_PARAMETER;
 +            if(SMemCharToByte[StringByte0] == 0xFF || SMemCharToByte[StringByte1] == 0xFF)
 +                return ERROR_INVALID_PARAMETER;
 +
 +            // Overflow check
 +            if(pbBinary >= pbBinaryEnd)
 +                return ERROR_INSUFFICIENT_BUFFER;
 +
 +            *pbBinary++ = (SMemCharToByte[StringByte0] << 0x04) | SMemCharToByte[StringByte1];
 +            szString += 2;
 +        }
 +
 +        // Odd number of chars?
 +        if(szString[0] != 0 && szString[1] == 0)
 +            return ERROR_INVALID_PARAMETER;
 +    }
 +
 +    // Give the length
 +    if(PtrBinary != NULL)
 +        PtrBinary[0] = pbBinary - pbSaveBinary;
 +    return ERROR_SUCCESS;
 +}
 +
  char * StringCopy(char * szTarget, size_t cchTarget, const char * szSource);
  void StringCat(char * szTarget, size_t cchTargetMax, const char * szSource);
  void StringCreatePseudoFileName(char * szBuffer, size_t cchMaxChars, unsigned int nIndex, const char * szExtension);
 diff --git a/src/StormLib.h b/src/StormLib.h index 4072a7d..bf884c5 100644 --- a/src/StormLib.h +++ b/src/StormLib.h @@ -1,7 +1,7 @@  /*****************************************************************************/  /* StormLib.h                        Copyright (c) Ladislav Zezula 1999-2017 */  /*---------------------------------------------------------------------------*/ -/* StormLib library v 9.22                                                   */ +/* StormLib library v 9.30                                                   */  /*                                                                           */  /* Author : Ladislav Zezula                                                  */  /* E-mail : ladik@zezula.net                                                 */ @@ -74,6 +74,7 @@  /* 12.12.16  9.21  Lad  Release 9.21                                         */  /* 10.11.17  9.22  Lad  Release 9.22                                         */  /* 28.09.22  9.24  Lad  lcLocale -> lcFileLocale, also contains platform     */ +/* 01.11.24  9.30  Lad  Added conversion from UTF-8 to file name and back    */  /*****************************************************************************/  #ifndef __STORMLIB_H__ @@ -143,8 +144,8 @@ extern "C" {  //-----------------------------------------------------------------------------  // Defines -#define STORMLIB_VERSION                0x091A  // Current version of StormLib -#define STORMLIB_VERSION_STRING         "9.26"  // Current version of StormLib as string +#define STORMLIB_VERSION                0x091E  // Current version of StormLib +#define STORMLIB_VERSION_STRING         "9.30"  // Current version of StormLib as string  #define ID_MPQ                      0x1A51504D  // MPQ archive header ID ('MPQ\x1A')  #define ID_MPQ_USERDATA             0x1B51504D  // MPQ userdata entry ('MPQ\x1B') @@ -1132,6 +1133,30 @@ int    WINAPI SCompDecompress (void * pvOutBuffer, int * pcbOutBuffer, void * pv  int    WINAPI SCompDecompress2(void * pvOutBuffer, int * pcbOutBuffer, void * pvInBuffer, int cbInBuffer);  //----------------------------------------------------------------------------- +// Conversion of UTF-8 (MPQ listfiles) into file name safe strings + +#define SFILE_UTF8_ALLOW_INVALID_CHARS  0x01        // If set, then the function will treat invalid chars like like MultiByteToWideChar +#define SFILE_UTF8_INVALID_CHARACTER    0xFFFD      // Marker of an invalid character +#define SFILE_UNICODE_MAX               0x10FFFF    // The highest valid UNICODE char + +// Conversion of MPQ file name to file-name-safe string +DWORD  WINAPI SMemUTF8ToFileName( +    TCHAR * szBuffer,               // Pointer to the output buffer. If NULL, the function will calulate the needed length +    size_t ccBuffer,                // Length of the output buffer (must include EOS) +    const void * lpString,          // Pointer to the begin of the string +    const void * lpStringEnd,       // Pointer to the end of string. If NULL, it's assumed to be zero-terminated +    DWORD dwFlags,                  // Additional flags +    size_t * pOutLength);           // Pointer to a variable that receives the needed length (optional) + +DWORD  WINAPI SMemFileNameToUTF8( +    void * lpBuffer,                // Pointer to the output buffer. If NULL, the function will calulate the needed length +    size_t ccBuffer,                // Length of the output buffer (must include EOS) +    const TCHAR * szString,         // Pointer to the begin of the string +    const TCHAR * szStringEnd,      // Pointer to the end of string. If NULL, it's assumed to be zero-terminated +    DWORD dwFlags,                  // Reserved +    size_t * pOutLength);           // Pointer to a variable that receives the needed length in bytes (optional) + +//-----------------------------------------------------------------------------  // Non-Windows support for SetLastError/GetLastError  #ifndef STORMLIB_WINDOWS diff --git a/src/StormPort.h b/src/StormPort.h index aa309e1..f00afd6 100644 --- a/src/StormPort.h +++ b/src/StormPort.h @@ -44,6 +44,10 @@    #define _CRT_NON_CONFORMING_SWPRINTFS    #endif +  #if defined(UNICODE) || defined(_UNICODE) +  #define STORMLIB_WIDE_CHAR +  #endif +    #include <tchar.h>    #include <assert.h>    #include <ctype.h> @@ -52,15 +56,9 @@    // Suppress definitions of `min` and `max` macros by <windows.h>:    #define NOMINMAX 1    #include <windows.h> -    #include <wininet.h> -  #define STORMLIB_LITTLE_ENDIAN -  #ifdef _WIN64 -    #define STORMLIB_64BIT -  #else -    #define STORMLIB_32BIT -  #endif +  #define STORMLIB_LITTLE_ENDIAN    #define STORMLIB_CDECL __cdecl @@ -74,7 +72,6 @@  #if !defined(STORMLIB_PLATFORM_DEFINED) && defined(__APPLE__)  // Mac BSD API -  // Macintosh    #include <sys/types.h>    #include <sys/stat.h>    #include <sys/mman.h> @@ -320,13 +317,8 @@  // Definition of Windows-specific types for non-Windows platforms  #ifndef STORMLIB_WINDOWS -  #if __LP64__ -    #define STORMLIB_64BIT -  #else -    #define STORMLIB_32BIT -  #endif -  // __cdecl meand nothing on non-Windows +  // __cdecl means nothing on non-Windows    #define STORMLIB_CDECL /* */    // Typedefs for ANSI C @@ -340,7 +332,7 @@    typedef long long      LONGLONG;    typedef unsigned long long ULONGLONG;    typedef void         * HANDLE; -  typedef void         * LPOVERLAPPED; // Unsupported on Linux and Mac +  typedef void         * LPOVERLAPPED;    typedef char           TCHAR;    typedef unsigned int   LCID;    typedef LONG         * PLONG; @@ -351,7 +343,7 @@    typedef char         * LPTSTR;    typedef char         * LPSTR; -  #ifdef STORMLIB_32BIT +  #ifndef __LP64__      #define _LZMA_UINT32_IS_ULONG    #endif @@ -409,12 +401,14 @@    #define ERROR_DISK_FULL                ENOSPC    #define ERROR_ALREADY_EXISTS           EEXIST    #define ERROR_INSUFFICIENT_BUFFER      ENOBUFS -  #define ERROR_BAD_FORMAT               1000        // No such error code under Linux -  #define ERROR_NO_MORE_FILES            1001        // No such error code under Linux -  #define ERROR_HANDLE_EOF               1002        // No such error code under Linux -  #define ERROR_CAN_NOT_COMPLETE         1003        // No such error code under Linux -  #define ERROR_FILE_CORRUPT             1004        // No such error code under Linux -  #define ERROR_BUFFER_OVERFLOW          1005        // No such error code under Linux +  #define ERROR_BAD_FORMAT               1000        // No such error codes under Linux +  #define ERROR_NO_MORE_FILES            1001 +  #define ERROR_HANDLE_EOF               1002 +  #define ERROR_CAN_NOT_COMPLETE         1003 +  #define ERROR_FILE_CORRUPT             1004 +  #define ERROR_BUFFER_OVERFLOW          1005 +  #define ERROR_INVALID_DATA             1006 +  #define ERROR_NO_UNICODE_TRANSLATION   1007  #endif  // Macros that can sometimes be missing diff --git a/src/wdk/sources-cpp.cpp b/src/wdk/sources-cpp.cpp index f289975..a9d7ba3 100644 --- a/src/wdk/sources-cpp.cpp +++ b/src/wdk/sources-cpp.cpp @@ -24,3 +24,4 @@  #include "src\SFilePatchArchives.cpp"  #include "src\SFileReadFile.cpp"  #include "src\SFileVerify.cpp" +#include "src\SMemUtf8.cpp" diff --git a/test/StormTest.cpp b/test/StormTest.cpp index 0312abb..7cd885d 100755 --- a/test/StormTest.cpp +++ b/test/StormTest.cpp @@ -275,6 +275,21 @@ static SFILE_MARKERS MpqMarkers[] =  static TCHAR szMpqDirectory[MAX_PATH+1];
  size_t cchMpqDirectory = 0;
 +inline bool AssertTrue(bool bCondition)
 +{
 +    if(!bCondition)
 +    {
 +#ifdef STORMLIB_WINDOWS
 +        __debugbreak();
 +#else
 +        assert(false);
 +#endif        
 +    }
 +    return bCondition;
 +}
 +
 +#define ASSERT_TRUE(condition)      { if(!AssertTrue(condition)) { return false; } }
 +
  static EXTRA_TYPE GetExtraType(const void * pExtra)
  {
      if(pExtra != NULL)
 @@ -328,30 +343,6 @@ LPCTSTR GetRelativePath(LPCTSTR szFullPath)      return _T("");
  }
 -// Converts binary array to string.
 -// The caller must ensure that the buffer has at least ((cbBinary * 2) + 1) characters
 -template <typename xchar>
 -xchar * StringFromBinary(LPBYTE pbBinary, size_t cbBinary, xchar * szBuffer)
 -{
 -    const char * IntToHexChar = "0123456789abcdef";
 -    xchar * szSaveBuffer = szBuffer;
 -
 -    // Verify the binary pointer
 -    if(pbBinary && cbBinary)
 -    {
 -        // Convert the bytes to string array
 -        for(size_t i = 0; i < cbBinary; i++)
 -        {
 -            *szBuffer++ = IntToHexChar[pbBinary[i] >> 0x04];
 -            *szBuffer++ = IntToHexChar[pbBinary[i] & 0x0F];
 -        }
 -    }
 -
 -    // Terminate the string
 -    *szBuffer = 0;
 -    return szSaveBuffer;
 -}
 -
  const char * GetFileText(PFILE_DATA pFileData)
  {
      const char * szFileText = (const char *)(pFileData->FileData);
 @@ -1764,7 +1755,7 @@ static DWORD VerifyDataChecksum(TLogHelper & Logger, HANDLE hMpq, DWORD dwSearch          // Check the MD5 hash, if given
          if(IS_VALID_STRING(szNameHash))
          {
 -            StringFromBinary(NameHash, MD5_DIGEST_SIZE, szNameHash);
 +            SMemBinToStr(szNameHash, _countof(szNameHash), NameHash, MD5_DIGEST_SIZE);
              if(_stricmp(szNameHash, szExpectedHash))
              {
                  Logger.PrintMessage("Extracted files MD5 mismatch (expected: %s, obtained: %s)", szExpectedHash, szNameHash);
 @@ -3785,23 +3776,91 @@ static DWORD TestReplaceFile(LPCTSTR szMpqPlainName, LPCTSTR szFilePlainName, LP      return dwErrCode;
  }
 -static void Test_PlayingSpace()
 +static bool TestUtfConversion(const void * lpString)
  {
 -    HANDLE hFile = NULL;
 -    HANDLE hMpq = NULL;
 +    LPTSTR szBuffer;
 +    LPBYTE pbBuffer;
 +    size_t nLength1 = 0;
 +    size_t nLength2 = 0;
 +    DWORD dwErrCode1;
 +    DWORD dwErrCode2;
 +    TCHAR szWideBuffer[1];
 +    BYTE szByteBuffer[1];
 +    int nResult;
 +
 +    // Get the number of bytes of the buffer while the output buffer is 0
 +    dwErrCode1 = SMemUTF8ToFileName(NULL, 0, lpString, NULL, 0, &nLength1);
 +
 +    // Check the number of bytes when the buffer is non-NULL, but buffer length is insufficient
 +    dwErrCode2 = SMemUTF8ToFileName(szWideBuffer, _countof(szWideBuffer), lpString, NULL, 0, &nLength2);
 +    ASSERT_TRUE(dwErrCode2 == dwErrCode1);
 +    ASSERT_TRUE(nLength2 == nLength1);
 +
 +    // Check the number of bytes when the buffer is non-NULL, and buffer length is sufficient
 +    if((szBuffer = STORM_ALLOC(TCHAR, nLength1)) != NULL)
 +    {
 +        dwErrCode2 = SMemUTF8ToFileName(szBuffer, nLength1, lpString, NULL, 0, &nLength2);
 +        ASSERT_TRUE(dwErrCode2 == dwErrCode1);
 +        ASSERT_TRUE(nLength2 == nLength1);
 +
 +        // Get the number of bytes of the buffer while the output buffer is 0
 +        dwErrCode1 = SMemFileNameToUTF8(NULL, 0, szBuffer, NULL, 0, &nLength1);
 +
 +        // Check the number of bytes when the buffer is non-NULL, but buffer length is insufficient
 +        dwErrCode2 = SMemFileNameToUTF8(szByteBuffer, _countof(szByteBuffer), szBuffer, NULL, 0, &nLength2);
 +        ASSERT_TRUE(dwErrCode2 == dwErrCode1);
 +        ASSERT_TRUE(nLength2 == nLength1);
 +
 +        // Check the conversion into a buffer large enough
 +        if((pbBuffer = STORM_ALLOC(BYTE, nLength1)) != NULL)
 +        {
 +            dwErrCode2 = SMemFileNameToUTF8(pbBuffer, nLength1, szBuffer, NULL, 0, &nLength2);
 +            ASSERT_TRUE(dwErrCode2 == dwErrCode1);
 +            ASSERT_TRUE(nLength2 == nLength1);
 -    if(SFileOpenArchive(_T("c:\\RedHero vs 7Com22 (Final Stage GOD).scx"), 0, 0, &hMpq))
 -    {
 -        SFileSetLocale(0x409);
 +            nResult = memcmp(pbBuffer, lpString, nLength1);
 +            ASSERT_TRUE(nResult == 0);
 +
 +            STORM_FREE(pbBuffer);
 +        }
 +
 +        STORM_FREE(szBuffer);
 +    }
 +    return true;
 +}
 -        if(SFileOpenFileEx(hMpq, "staredit\\scenario.chk", 0, &hFile))
 +static DWORD TestUtf8Conversions(const BYTE * szTestString, const TCHAR * szListFile)
 +{
 +    SFILE_FIND_DATA sf;
 +    HANDLE hFind;
 +    TCHAR szFullPath[MAX_PATH];
 +
 +    // Check conversion of the invalid UTF8 string
 +    TestUtfConversion(szTestString);
 +
 +    // Create full path of the listfile
 +    CreateFullPathName(szFullPath, _countof(szFullPath), szListFileDir, szListFile);
 +
 +    // Test all file names in the Chinese listfile
 +    hFind = SListFileFindFirstFile(NULL, szFullPath, "*", &sf);
 +    if(hFind != NULL)
 +    {
 +        while(SListFileFindNextFile(hFind, &sf))
          {
 -            SFileCloseFile(hFile);
 +            if(!TestUtfConversion(sf.cFileName))
 +            {
 +                return ERROR_INVALID_DATA;
 +            }
          }
 -        SFileCloseArchive(hMpq);
 +        SListFileFindClose(hFind);
      }
 +
 +    return ERROR_SUCCESS;
  }
 +static void Test_PlayingSpace()
 +{}
 +
  //-----------------------------------------------------------------------------
  // Tables
 @@ -3814,13 +3873,59 @@ static LPCTSTR szDiabdatMPQ = _T("MPQ_1997_v1_Diablo1_DIABDAT.MPQ");  static const TEST_EXTRA_ONEFILE  LfBliz = {ListFile, _T("ListFile_Blizzard.txt")};
  static const TEST_EXTRA_ONEFILE  LfWotI = {ListFile, _T("ListFile_WarOfTheImmortals.txt")};
 +static const TEST_EXTRA_ONEFILE  LfBad1 = {ListFile, _T("ListFile_UTF8_Bad.txt")};
  static const BYTE szMpqFileNameUTF8[] = {0x4D, 0x50, 0x51, 0x5F, 0x32, 0x30, 0x32, 0x34, 0x5F, 0x76, 0x31, 0x5F, 0xE6, 0x9D, 0x82, 0xE9, 0xB1, 0xBC, 0xE5, 0x9C, 0xB0, 0xE7, 0x89, 0xA2, 0x5F, 0x30, 0x2E, 0x30, 0x38, 0x34, 0x62, 0x65, 0x74, 0x61, 0x34, 0x36, 0x2E, 0x77, 0x33, 0x78, 0x00};
  static const BYTE szLstFileNameUTF8[] = {0x4C, 0x69, 0x73, 0x74, 0x46, 0x69, 0x6C, 0x65, 0x5F, 0xE6, 0x9D, 0x82, 0xE9, 0xB1, 0xBC, 0xE5, 0x9C, 0xB0, 0xE7, 0x89, 0xA2, 0x5F, 0x30, 0x2E, 0x30, 0x38, 0x34, 0x62, 0x65, 0x74, 0x61, 0x34, 0x36, 0x2E, 0x74, 0x78, 0x74, 0x00};
 -static const TEST_EXTRA_UTF8     MpqUtf8 = {Utf8File, szMpqFileNameUTF8, szLstFileNameUTF8};
 -static const TEST_EXTRA_TWOFILES TwoFilesD1 = {TwoFiles, "music\\dintro.wav", "File00000023.xxx"};
 -static const TEST_EXTRA_TWOFILES TwoFilesD2 = {TwoFiles, "waitingroombkgd.dc6"};
 +static const BYTE FileNameInvalidUTF8[] =
 +{
 +//  Hexadecimal                    Binary                                   UTF-16      String
 +//  ----                           ---------------------------------        ------      ------
 +    0x7c,                   // --> 01111100                             --> 0x007c      %u[7cb7]
 +    0xb7,                   // --> 10110111(bad)                        --> 0xfffd
 +    0xc9, 0xb7,             // --> 11001001 10110111                    --> 0x0277      \x0277
 +    0xc9, /* ca */          // --> 11001001 11001010(bad)               --> 0xfffd      %u[c9cac0bde7]
 +    0xca, /* c0 */          // --> 11001010 11000000(bad)               --> 0xfffd
 +    0xc0, /* bd */          // --> 11000000 10111101(bad)               --> 0x003d(bad)
 +    0xbd,                   // --> 10111101(bad)                        --> 0xfffd
 +    0xe7, /* c4 */          // --> 11100111 11000100(bad)               --> 0xfffd
 +    0xc4, 0xa7,             // --> 11000100 10100111                    --> 0x0127      \x0127
 +    0xca, /* de */          // --> 11001010 11011110(bad)               --> 0xfffd      %ca
 +    0xde, 0xbb,             // --> 11011110 10111011                    --> 0x07bb      \x07bb
 +    0xb6,                   // --> 10110110(bad)                        --> 0xfffd      %b6
 +    0xd3, 0xad,             // --> 11010011 10101101                    --> 0x04ed      \x04ed
 +    0xc4, /* fa */          // --> 11000100 11111010(bad)               --> 0xfffd      %u[c4fa]
 +    0xfa, /* 5f */          // --> 11111010 01011111(bad)               --> 0xfffd
 +    0x5f,                   // --> 01011111                             --> 0x005f      _
 +    0xa1,                   // --> 10100001(bad)                        --> 0xfffd      %u[a1eea1f0a1ef]
 +    0xee, /* a1 f0 */       // --> 11101110 10100001 11110000(bad)      --> 0xfffd
 +    0xa1,                   // --> 10100001(bad)                        --> 0xfffd
 +    0xf0, /* a1 ef */       // --> 11110000 10100001 11101111(bad)      --> 0xfffd
 +    0xa1,                   // --> 10100001(bad)                        --> 0xfffd
 +    0xef, /* 5f */          // --> 11101111 01011111(bad)               --> 0xfffd
 +    0x5f,                   // --> 01011111                             --> 0x005f      _
 +    0xf0, /* 80 80 80 */    // --> 11110000 10000000 10000000 10000000  --> 0x0000(bad) %u[f0808080]
 +    0x80,                   // --> 10000000(bad)                        --> 0xfffd
 +    0x80,                   // --> 10000000(bad)                        --> 0xfffd
 +    0x80,                   // --> 10000000(bad)                        --> 0xfffd
 +    0xe9, 0xa3, 0x9e,       // --> 11101001 10100011 10011110           --> 0x98de      \x98de
 +    0xe4, 0xb8, 0x96,       // --> 11100100 10111000 10010110           --> 0x4e16      \x4e16
 +    0xe7, 0x95, 0x8c,       // --> 11100111 10010101 10001100           --> 0x754c      \x754c
 +    0xe9, 0xad, 0x94,       // --> 11101001 10101101 10010100           --> 0x9b54      \x9b54
 +    0xe5, 0x85, 0xbd,       // --> 11100101 10000101 10111101           --> 0x517d      \x517d
 +    0xe6, 0xac, 0xa2,       // --> 11100110 10101100 10100010           --> 0x6b22      \x6b22
 +    0xe8, 0xbf, 0x8e,       // --> 11101000 10111111 10001110           --> 0x8fce      \x8fce
 +    0xe6, 0x82, 0xa8,       // --> 11100110 10000010 10101000           --> 0x60a8      \x60a8
 +    0x2e,                   // --> 00101110                             --> 0x002e      \x002e
 +    0x6d, 0x64, 0x78,       // --> 01101101 01100100 01111000           --> ".mdx"
 +    0x00                    // --> 00000000                             --> EOS
 +};
 +
 +static const TEST_EXTRA_UTF8 MpqUtf8 = {Utf8File, szMpqFileNameUTF8, szLstFileNameUTF8};
 +
 +static const TEST_EXTRA_TWOFILES TwoFilesD1  = {TwoFiles, "music\\dintro.wav", "File00000023.xxx"};
 +static const TEST_EXTRA_TWOFILES TwoFilesD2  = {TwoFiles, "waitingroombkgd.dc6"};
  static const TEST_EXTRA_TWOFILES TwoFilesW3M = {TwoFiles, "file00000002.blp"};
  static const TEST_EXTRA_TWOFILES TwoFilesW3X = {TwoFiles, "BlueCrystal.mdx"};
 @@ -4046,6 +4151,8 @@ static const TEST_INFO1 Test_OpenMpqs[] =      {_T("MPQ_2023_v1_GreenTD.w3x"),                             NULL, "a8d91fc4e52d7c21ff7feb498c74781a",  2004},               // Corrupt sector checksum table in file #A0
      {_T("MPQ_2023_v4_1F644C5A.SC2Replay"),                      NULL, "b225828ffbf5037553e6a1290187caab",    17},               // Corrupt patch info of the "(attributes)" file
      {_T("<Chinese MPQ name>"),                                  NULL, "67faeffd0c0aece205ac8b7282d8ad8e",  4697, &MpqUtf8},     // Chinese name of the MPQ
 +    {_T("MPQ_2024_v1_BadUtf8_5.0.2.w3x"),                       NULL, "be34f9862758f021a1c6c77df3cd4f05",  6393, &LfBad1},      // Bad UTF-8 sequences in file names
 +    
      // Protected archives
      {_T("MPQ_2002_v1_ProtectedMap_InvalidUserData.w3x"),        NULL, "b900364cc134a51ddeca21a13697c3ca",    79},
 @@ -4224,6 +4331,9 @@ int _tmain(int argc, TCHAR * argv[])      // Placeholder function for various testing purposes
      Test_PlayingSpace();
 +    // Test the UTF-8 conversions
 +    TestUtf8Conversions(FileNameInvalidUTF8, LfBad1.szFile);
 +
  #ifdef TEST_COMMAND_LINE
      // Test-open MPQs from the command line. They must be plain name
      // and must be placed in the Test-MPQs folder
 @@ -4235,10 +4345,9 @@ int _tmain(int argc, TCHAR * argv[])  #ifdef TEST_LOCAL_LISTFILE      // Tests on a local listfile
      if(dwErrCode == ERROR_SUCCESS)
 -    {
 -        TestOnLocalListFile(_T("FLAT-MAP:listfile-test.txt"));
 +        dwErrCode = TestOnLocalListFile(_T("FLAT-MAP:listfile-test.txt"));
 +    if(dwErrCode == ERROR_SUCCESS)
          dwErrCode = TestOnLocalListFile(_T("listfile-test.txt"));
 -    }
  #endif  // TEST_LOCAL_LISTFILE
  #ifdef TEST_STREAM_OPERATIONS   // Test file stream operations
 diff --git a/test/stormlib-test-001.txt b/test/stormlib-test-001.txt index db8df0e..1225add 100644 --- a/test/stormlib-test-001.txt +++ b/test/stormlib-test-001.txt @@ -1,4 +1,4 @@ -==== Test Suite for StormLib version 9.26 ==== +==== Test Suite for StormLib version 9.30 ====  InitWorkFolder: Work directory \Multimedia\MPQs (default)  TestLiFiSearch (FLAT-MAP:listfile-test.txt) succeeded.  TestLiFiSearch (listfile-test.txt) succeeded. @@ -61,6 +61,7 @@ TestReadingMpq (MPQ_2023_v4_UTF8.s2ma) succeeded.  TestReadingMpq (MPQ_2023_v1_GreenTD.w3x) succeeded.  TestReadingMpq (MPQ_2023_v4_1F644C5A.SC2Replay) succeeded.  TestReadingMpq (<Chinese MPQ name>) succeeded. +TestReadingMpq (MPQ_2024_v1_BadUtf8_5.0.2.w3x) succeeded.  TestReadingMpq (MPQ_2002_v1_ProtectedMap_InvalidUserData.w3x) succeeded.  TestReadingMpq (MPQ_2002_v1_ProtectedMap_InvalidMpqFormat.w3x) succeeded.  TestReadingMpq (MPQ_2002_v1_ProtectedMap_Spazzler.w3x) succeeded.  | 
