aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt1
-rw-r--r--StormLib.vcxproj1
-rw-r--r--StormLib.vcxproj.filters3
-rw-r--r--StormLib_dll.vcxproj1
-rw-r--r--StormLib_dll.vcxproj.filters3
-rw-r--r--StormLib_test.vcxproj1
-rw-r--r--StormLib_test.vcxproj.filters3
-rw-r--r--StormLib_vs08.vcproj164
-rw-r--r--StormLib_vs08_dll.vcproj84
-rw-r--r--StormLib_vs08_test.vcproj36
-rw-r--r--src/FileStream.cpp4
-rw-r--r--src/SBaseCommon.cpp6
-rw-r--r--src/SMemUtf8.cpp551
-rw-r--r--src/StormCommon.h73
-rw-r--r--src/StormLib.h31
-rw-r--r--src/StormPort.h38
-rw-r--r--src/wdk/sources-cpp.cpp1
-rwxr-xr-xtest/StormTest.cpp189
-rw-r--r--test/stormlib-test-001.txt3
19 files changed, 1119 insertions, 74 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5bf15df..8d17ebe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,6 +60,7 @@ set(SRC_FILES
src/SFilePatchArchives.cpp
src/SFileReadFile.cpp
src/SFileVerify.cpp
+ src/SMemUtf8.cpp
src/libtomcrypt/src/pk/rsa/rsa_verify_simple.c
src/libtomcrypt/src/misc/crypt_libc.c
)
diff --git a/StormLib.vcxproj b/StormLib.vcxproj
index 26d8f70..57cea24 100644
--- a/StormLib.vcxproj
+++ b/StormLib.vcxproj
@@ -1006,6 +1006,7 @@
<ClCompile Include="src\lzma\C\Threads.c" />
<ClCompile Include="src\pklib\explode.c" />
<ClCompile Include="src\pklib\implode.c" />
+ <ClCompile Include="src\SMemUtf8.cpp" />
<ClCompile Include="src\sparse\sparse.cpp" />
<ClCompile Include="src\zlib\adler32.c" />
<ClCompile Include="src\zlib\compress_zlib.c" />
diff --git a/StormLib.vcxproj.filters b/StormLib.vcxproj.filters
index 22c9793..024fa9d 100644
--- a/StormLib.vcxproj.filters
+++ b/StormLib.vcxproj.filters
@@ -217,5 +217,8 @@
<ClCompile Include="src\LibTomMathDesc.c">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="src\SMemUtf8.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
</ItemGroup>
</Project> \ No newline at end of file
diff --git a/StormLib_dll.vcxproj b/StormLib_dll.vcxproj
index f437bd9..67899f5 100644
--- a/StormLib_dll.vcxproj
+++ b/StormLib_dll.vcxproj
@@ -322,6 +322,7 @@
<WarningLevel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Level4</WarningLevel>
<WarningLevel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Level4</WarningLevel>
</ClCompile>
+ <ClCompile Include="src\SMemUtf8.cpp" />
<ClCompile Include="src\sparse\sparse.cpp" />
<ClCompile Include="src\zlib\adler32.c" />
<ClCompile Include="src\zlib\compress_zlib.c" />
diff --git a/StormLib_dll.vcxproj.filters b/StormLib_dll.vcxproj.filters
index 2eaf026..0e05d88 100644
--- a/StormLib_dll.vcxproj.filters
+++ b/StormLib_dll.vcxproj.filters
@@ -220,6 +220,9 @@
<ClCompile Include="src\LibTomMathDesc.c">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="src\SMemUtf8.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="src\DllMain.rc">
diff --git a/StormLib_test.vcxproj b/StormLib_test.vcxproj
index 18cee5b..71aebf9 100644
--- a/StormLib_test.vcxproj
+++ b/StormLib_test.vcxproj
@@ -336,6 +336,7 @@
<WarningLevel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Level4</WarningLevel>
<WarningLevel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Level4</WarningLevel>
</ClCompile>
+ <ClCompile Include="src\SMemUtf8.cpp" />
<ClCompile Include="src\sparse\sparse.cpp" />
<ClCompile Include="src\zlib\adler32.c" />
<ClCompile Include="src\zlib\compress_zlib.c" />
diff --git a/StormLib_test.vcxproj.filters b/StormLib_test.vcxproj.filters
index d341b12..cf1aa5e 100644
--- a/StormLib_test.vcxproj.filters
+++ b/StormLib_test.vcxproj.filters
@@ -226,5 +226,8 @@
<ClCompile Include="src\LibTomMathDesc.c">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="src\SMemUtf8.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
</ItemGroup>
</Project> \ No newline at end of file
diff --git a/StormLib_vs08.vcproj b/StormLib_vs08.vcproj
index 031dde8..3f8674d 100644
--- a/StormLib_vs08.vcproj
+++ b/StormLib_vs08.vcproj
@@ -4038,6 +4038,170 @@
/>
</FileConfiguration>
</File>
+ <File
+ RelativePath=".\src\SMemUtf8.cpp"
+ >
+ <FileConfiguration
+ Name="DebugAD|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="DebugAD|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="DebugAS|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="DebugAS|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="ReleaseAD|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="ReleaseAD|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="ReleaseAS|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="ReleaseAS|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="DebugUD|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="DebugUD|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="DebugUS|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="DebugUS|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="ReleaseUD|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="ReleaseUD|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="ReleaseUS|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="ReleaseUS|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="2"
+ PrecompiledHeaderThrough="StormCommon.h"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ </File>
<Filter
Name="adpcm"
>
diff --git a/StormLib_vs08_dll.vcproj b/StormLib_vs08_dll.vcproj
index 66f0705..b51db31 100644
--- a/StormLib_vs08_dll.vcproj
+++ b/StormLib_vs08_dll.vcproj
@@ -1680,6 +1680,90 @@
/>
</FileConfiguration>
</File>
+ <File
+ RelativePath=".\src\SMemUtf8.cpp"
+ >
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="DebugAD|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="DebugAD|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="ReleaseAD|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="ReleaseAD|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="ReleaseAS|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="ReleaseAS|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ </File>
<Filter
Name="adpcm"
>
diff --git a/StormLib_vs08_test.vcproj b/StormLib_vs08_test.vcproj
index d3506ea..48f2dbd 100644
--- a/StormLib_vs08_test.vcproj
+++ b/StormLib_vs08_test.vcproj
@@ -1039,6 +1039,42 @@
</FileConfiguration>
</File>
<File
+ RelativePath=".\src\SMemUtf8.cpp"
+ >
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ WarningLevel="4"
+ />
+ </FileConfiguration>
+ </File>
+ <File
RelativePath=".\test\StormTest.cpp"
>
<FileConfiguration
diff --git a/src/FileStream.cpp b/src/FileStream.cpp
index b66098c..39db0a0 100644
--- a/src/FileStream.cpp
+++ b/src/FileStream.cpp
@@ -78,7 +78,7 @@ static void CreateNameWithSuffix(LPTSTR szBuffer, size_t cchMaxChars, LPCTSTR sz
*szBuffer++ = '.';
// Append the number
- IntToString(szBuffer, szBufferEnd - szBuffer + 1, nValue);
+ SMemIntToStr(szBuffer, szBufferEnd - szBuffer + 1, nValue);
}
//-----------------------------------------------------------------------------
@@ -1765,7 +1765,7 @@ static void PartStream_Close(TBlockStream * pStream)
// Make sure that the header is properly BSWAPed
BSWAP_ARRAY32_UNSIGNED(&PartHeader, sizeof(PART_FILE_HEADER));
- IntToString(PartHeader.GameBuildNumber, _countof(PartHeader.GameBuildNumber), pStream->BuildNumber);
+ SMemIntToStr(PartHeader.GameBuildNumber, _countof(PartHeader.GameBuildNumber), pStream->BuildNumber);
// Write the part header
pStream->BaseWrite(pStream, &ByteOffset, &PartHeader, sizeof(PART_FILE_HEADER));
diff --git a/src/SBaseCommon.cpp b/src/SBaseCommon.cpp
index 0de7864..3f95ded 100644
--- a/src/SBaseCommon.cpp
+++ b/src/SBaseCommon.cpp
@@ -30,7 +30,7 @@ LCID g_lcFileLocale = 0; // Compound of file locale and p
// Converts ASCII characters to lowercase
// Converts slash (0x2F) to backslash (0x5C)
-unsigned char AsciiToLowerTable[256] =
+const unsigned char AsciiToLowerTable[256] =
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
@@ -52,7 +52,7 @@ unsigned char AsciiToLowerTable[256] =
// Converts ASCII characters to uppercase
// Converts slash (0x2F) to backslash (0x5C)
-unsigned char AsciiToUpperTable[256] =
+const unsigned char AsciiToUpperTable[256] =
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
@@ -135,7 +135,7 @@ void StringCreatePseudoFileName(char * szBuffer, size_t cchMaxChars, unsigned in
szBuffer = StringCopy(szBuffer, (szBufferEnd - szBuffer), "File");
// Number
- szBuffer = IntToString(szBuffer, szBufferEnd - szBuffer + 1, nIndex, 8);
+ szBuffer = SMemIntToStr(szBuffer, szBufferEnd - szBuffer + 1, nIndex, 8);
// Dot
if(szBuffer < szBufferEnd)
diff --git a/src/SMemUtf8.cpp b/src/SMemUtf8.cpp
new file mode 100644
index 0000000..5832422
--- /dev/null
+++ b/src/SMemUtf8.cpp
@@ -0,0 +1,551 @@
+/*****************************************************************************/
+/* SFileVerify.cpp Copyright (c) Ladislav Zezula 2010 */
+/*---------------------------------------------------------------------------*/
+/* Support for conversion of UTF-8 <-> File name */
+/* */
+/* File names in the MPQs are assumed to be UTF-8. However, bad sequences */
+/* or filename unsafe characters are allowed in the list files, but won't */
+/* work in unpacking files from MPQ to a local file. */
+/* */
+/* This module contains cross-platform comparable conversion between UTF-8 */
+/* and file names that will produce identical file names across platforms. */
+/*---------------------------------------------------------------------------*/
+/* Date Ver Who Comment */
+/* -------- ---- --- ------- */
+/* 31.10.24 1.00 Lad Created */
+/*****************************************************************************/
+
+#define __STORMLIB_SELF__
+#include "StormLib.h"
+#include "StormCommon.h"
+
+//-----------------------------------------------------------------------------
+// Local defines
+
+#define MAX_INVALID_CHARS 128 // Maximum number of invalid characters in a row
+
+//-----------------------------------------------------------------------------
+// Conversion tables
+
+const unsigned char SMemCharToByte[0x80] =
+{
+ // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0xFF
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x10
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x20
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x30
+ 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x40
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x50
+ 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x60
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF // 0x70
+};
+
+//-----------------------------------------------------------------------------
+// Local functions
+
+// Bit mask of characters that are file name safe. We will maintain
+// the same charset even on non-Windows in order to keep the file names equal
+static unsigned int FileNameSafeChars[4] =
+{
+ 0x00000000, 0x2BFF7BFB, 0xFFFFFFFF, 0xEFFFFFFF // Windows: [0x20-0x7F], except 0x22, 0x2A, 0x2F, 0x3A, 0x3C, 0x3E, 0x3F, 0x7C
+// 0xfffffffe, 0xffff7fff, 0xffffffff, 0xffffffff // Linux: [0x01-0x7F], except 0x2F
+};
+
+static bool UTF8_IsBadFileNameCharacter(DWORD ch)
+{
+ // It is guaranteed that the character is in range of 0x00 - 0x7F
+ assert(ch < 0x80);
+
+ // Use the bit from the table
+ return (FileNameSafeChars[ch / 32] & (1 << (ch % 32))) ? false : true;
+}
+
+static DWORD UTF8_DecodeSequence(const BYTE * pbString, BYTE BitsMask, size_t ccFollowBytes, DWORD dwMinValue, DWORD dwMaxValue, DWORD & dwCodePoint, size_t & ccBytesEaten)
+{
+ const BYTE * pbSaveString = pbString;
+ DWORD dwAccumulator;
+
+ // Extract the low bits from the leading byte
+ dwAccumulator = pbString[0] & BitsMask;
+ ccBytesEaten = 1;
+ pbString++;
+
+ // Process the follow-up bytes
+ for(size_t i = 0; i < ccFollowBytes; i++)
+ {
+ // Every follow-up byte in the UTF-8 sequence must start with 10xxxxxx
+ if((pbString[0] & 0xC0) != 0x80)
+ return ERROR_NO_UNICODE_TRANSLATION;
+
+ // Add 6 bits to the accumulator
+ dwAccumulator = (dwAccumulator << 6) | (*pbString++ & 0x3F);
+ }
+
+ // Check whether the code point is in the given range
+ if(!(dwMinValue <= dwAccumulator && dwAccumulator <= dwMaxValue))
+ return ERROR_INVALID_DATA;
+
+ // Give the number of bytes eaten and the decoded code point
+ ccBytesEaten = (pbString - pbSaveString);
+ dwCodePoint = dwAccumulator;
+ return ERROR_SUCCESS;
+}
+
+// https://en.wikipedia.org/wiki/UTF-8
+static DWORD UTF8_DecodeCodePoint(const BYTE * pbString, const BYTE * pbStringEnd, DWORD & dwCodePoint, size_t & ccBytesEaten)
+{
+ // Reset the number of bytes eaten
+ dwCodePoint = SFILE_UTF8_INVALID_CHARACTER;
+ ccBytesEaten = 0;
+
+ if(pbString < pbStringEnd)
+ {
+ // At least one byte will be eaten
+ ccBytesEaten = 1;
+
+ // 1st code point (0x00 - 0x7F, 1 byte)
+ if(pbString[0] <= 0x7F)
+ {
+ // This is the perfect spot to check for filename-unsafe characters
+ if(UTF8_IsBadFileNameCharacter(pbString[0]))
+ return ERROR_NO_UNICODE_TRANSLATION;
+
+ // Decode the 1-byte sequence
+ dwCodePoint = pbString[0];
+ return ERROR_SUCCESS;
+ }
+
+ // 2nd code point (0x80 - 0x7FF, 2 bytes)
+ if((pbString[0] & 0xE0) == 0xC0 && (pbString + 2) <= pbStringEnd)
+ {
+ // Decode the 2-byte sequence
+ return UTF8_DecodeSequence(pbString, 0x1F, 1, 0x80, 0x7FF, dwCodePoint, ccBytesEaten);
+ }
+
+ // 3rd code point (0x800 - 0xFFFF, 3 bytes)
+ // Note: MultiByteToWideChar will not decode 0xE0 0xBF 0xBF (--> 0x0FFF),
+ if((pbString[0] & 0xF0) == 0xE0 && (pbString + 3) <= pbStringEnd)
+ {
+ // Decode the 3-byte sequence
+ return UTF8_DecodeSequence(pbString, 0x0F, 2, 0x800, 0xFFFF, dwCodePoint, ccBytesEaten);
+ }
+
+ // 4th code point (0x10000 - 0x10FFFF, 4 bytes)
+ if((pbString[0] & 0xF8) == 0xF0 && (pbString + 4) <= pbStringEnd)
+ {
+ // Try to decode 4-byte sequence
+ return UTF8_DecodeSequence(pbString, 0x07, 3, 0x10000, SFILE_UNICODE_MAX, dwCodePoint, ccBytesEaten);
+ }
+
+ // An invalid UTF-8 sequence encountered
+ return ERROR_NO_UNICODE_TRANSLATION;
+ }
+
+ // No bytes available. Should never happen
+ assert(false);
+ return ERROR_BUFFER_OVERFLOW;
+}
+
+static size_t UTF8_EncodeSequence(DWORD dwCodePoint, BYTE LeadingByte, DWORD dwFollowByteCount, LPBYTE Utf8Buffer)
+{
+ DWORD dwByteShift = dwFollowByteCount * 6;
+
+ // Encode the highest byte
+ Utf8Buffer[0] = (BYTE)(LeadingByte | (dwCodePoint >> dwByteShift));
+ dwByteShift -= 6;
+
+ // Encode the follow bytes
+ for(DWORD i = 0; i < dwFollowByteCount; i++)
+ {
+ // The follow byte must be 10xxxxxx
+ Utf8Buffer[i + 1] = (BYTE)(0x80 | ((dwCodePoint >> dwByteShift) & 0x3F));
+ dwByteShift -= 6;
+ }
+
+ return dwFollowByteCount + 1;
+}
+
+static size_t UTF8_EncodeCodePoint(DWORD dwCodePoint, LPBYTE Utf8Buffer)
+{
+ // 0x00 - 0x7F, 1 byte
+ if(dwCodePoint < 0x80)
+ return UTF8_EncodeSequence(dwCodePoint, 0x00, 0, Utf8Buffer);
+
+ // 0x80 - 0x7FF
+ if(dwCodePoint < 0x800)
+ return UTF8_EncodeSequence(dwCodePoint, 0xC0, 1, Utf8Buffer);
+
+ // 0x800 - 0xFFFF
+ if(dwCodePoint < 0x10000)
+ return UTF8_EncodeSequence(dwCodePoint, 0xE0, 2, Utf8Buffer);
+
+ // 0x800 - 0xFFFF
+ if(dwCodePoint < 0x110000)
+ return UTF8_EncodeSequence(dwCodePoint, 0xF0, 3, Utf8Buffer);
+
+ // Should never happen
+ assert(false);
+ return 0;
+}
+
+static size_t UTF8_FlushInvalidChars(LPTSTR szBuffer, size_t ccBuffer, size_t nOutLength, LPBYTE InvalidChars, size_t nInvalidChars)
+{
+ // Case 0: No invalid char -> do nothing
+ if(nInvalidChars == 0)
+ {
+ return nOutLength;
+ }
+
+ // Case 1: One invalid char -> %xx (compatible with previous versions of MPQ Editor)
+ if(nInvalidChars == 1)
+ {
+ // Space for 3 characters needed
+ if(szBuffer != NULL && (nOutLength + 3) <= ccBuffer)
+ {
+ szBuffer[nOutLength] = '%';
+ SMemBinToStr(szBuffer + nOutLength + 1, ccBuffer - 1, InvalidChars, 1);
+ }
+ return nOutLength + 3;
+ }
+
+ // Case 1: More than one invalid char -> %u[xxyyzz]
+ else
+ {
+ // Enough space for %u[xxyyzz]
+ size_t nLengthNeeded = nInvalidChars * 2 + 4;
+
+ // Space for 4 characters needed
+ if(szBuffer != NULL && (nOutLength + nLengthNeeded) <= ccBuffer)
+ {
+ memcpy(szBuffer + nOutLength, _T("%u["), 6);
+
+ SMemBinToStr(szBuffer + nOutLength + 3, ccBuffer - 3, InvalidChars, nInvalidChars);
+
+ szBuffer[nOutLength + nLengthNeeded - 1] = ']';
+ szBuffer[nOutLength + nLengthNeeded] = 0;
+ }
+ return nOutLength + nLengthNeeded;
+ }
+}
+
+size_t UTF8_FlushBinBuffer(LPBYTE pbBuffer, size_t ccBuffer, size_t nOutLength, LPBYTE BinBuffer, size_t nByteCount)
+{
+ if(pbBuffer != NULL && (nOutLength + nByteCount) < ccBuffer)
+ memcpy(pbBuffer + nOutLength, BinBuffer, nByteCount);
+ return nOutLength + nByteCount;
+}
+
+#ifdef STORMLIB_WIDE_CHAR
+static size_t UTF16_EncodeCodePoint(DWORD dwCodePoint, unsigned short * Utf16Buffer)
+{
+ // https://en.wikipedia.org/wiki/UTF-16
+ if(dwCodePoint <= 0xFFFF)
+ {
+ Utf16Buffer[0] = (unsigned short)(dwCodePoint);
+ return 1;
+ }
+
+ if(dwCodePoint <= SFILE_UNICODE_MAX)
+ {
+ // Fix the code point
+ dwCodePoint -= 0x10000;
+
+ // Split the code point to two 10-bit values
+ Utf16Buffer[0] = (unsigned short)(0xD800 + (dwCodePoint >> 10)); // High 6 bytes
+ Utf16Buffer[1] = (unsigned short)(0xDC00 + (dwCodePoint & 0x3FF)); // Low 10 bytes
+ return 2;
+ }
+
+ // Should never happen
+ assert(false);
+ return 0;
+}
+
+static DWORD UTF16_DecodeCodePoint(LPCTSTR szString, LPCTSTR szStringEnd, DWORD & dwCodePoint, size_t & ccCharsEaten)
+{
+ // Reset the number of bytes eaten
+ dwCodePoint = SFILE_UTF8_INVALID_CHARACTER;
+ ccCharsEaten = 0;
+
+ if(szString < szStringEnd)
+ {
+ // At least one char will be eaten
+ ccCharsEaten = 1;
+
+ // Check for an invalid surrogate pair
+ if(0xDC00 <= szString[0] && szString[0] <= 0xDFFF)
+ {
+ dwCodePoint = SFILE_UTF8_INVALID_CHARACTER;
+ return ERROR_NO_UNICODE_TRANSLATION;
+ }
+
+ // Check for a valid surrogate pair
+ if(0xD800 <= szString[0] && szString[0] <= 0xDBFF && (szString + 1) < szStringEnd)
+ {
+ dwCodePoint = ((szString[0] - 0xD800) << 10) | (szString[1] - 0xDC00) + 0x10000;
+ ccCharsEaten = 2;
+ return ERROR_SUCCESS;
+ }
+
+ // Direct encoding
+ dwCodePoint = szString[0];
+ ccCharsEaten = 1;
+ return ERROR_SUCCESS;
+ }
+
+ // No bytes available. Should never happen
+ assert(false);
+ return ERROR_BUFFER_OVERFLOW;
+}
+#endif
+
+size_t UTF16_IsEncodedCharSequence(LPCTSTR szString, LPCTSTR szStringEnd, LPBYTE BinBuffer)
+{
+ size_t nEncodedChars = 0;
+
+ if((szString + 1) < szStringEnd && *szString++ == '%')
+ {
+ if((szString + 1) < szStringEnd && *szString++ == 'u')
+ {
+ if((szString + 1) < szStringEnd && *szString++ == '[')
+ {
+ // Keep going as long as we can convert
+ for(size_t i = 0; i < MAX_INVALID_CHARS; i++)
+ {
+ if(szString + (i * 2) >= szStringEnd)
+ break;
+ if(szString[i * 2] == ']')
+ break;
+ nEncodedChars++;
+ }
+
+ // Did we encounter the end of the string?
+ if(szString + (nEncodedChars * 2) + 1 <= szStringEnd && szString[nEncodedChars * 2] == ']')
+ {
+ TCHAR HexaString[MAX_INVALID_CHARS * 2 + 1];
+
+ // Copy the hexadecimal string
+ memcpy(HexaString, szString, (nEncodedChars * 2) * sizeof(TCHAR));
+ HexaString[nEncodedChars * 2] = 0;
+
+ // Try to decode the hexa string
+ if(SMemStrToBin(HexaString, BinBuffer, nEncodedChars) == ERROR_SUCCESS)
+ {
+ return nEncodedChars;
+ }
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+//-----------------------------------------------------------------------------
+// Public (exported) functions
+
+// Conversion of MPQ file name to file-name-safe string
+DWORD WINAPI SMemUTF8ToFileName(
+ LPTSTR szBuffer, // Pointer to the output buffer. If NULL, the function will calulate the needed length
+ size_t ccBuffer, // Length of the output buffer (must include EOS)
+ const void * lpString, // Pointer to the begin of the string
+ const void * lpStringEnd, // Pointer to the end of string. If NULL, it's assumed to be zero-terminated
+ DWORD dwFlags, // Additional flags
+ size_t * pOutLength = NULL) // Pointer to a variable that receives the needed length (optional)
+{
+ const BYTE * pbStringEnd = (const BYTE *)lpStringEnd;
+ const BYTE * pbString = (const BYTE *)lpString;
+ DWORD dwErrCode = ERROR_SUCCESS;
+ size_t nInvalidChars = 0;
+ size_t nOutLength = 0;
+ BYTE InvalidChars[MAX_INVALID_CHARS];
+
+ // Set the end of the input if not specified
+ if(pbStringEnd == NULL)
+ pbStringEnd = pbString + strlen((char *)pbString);
+
+ // Keep conversion as long
+ while(pbString < pbStringEnd)
+ {
+ size_t ccBytesEaten = 0;
+ size_t nCharLength;
+ DWORD dwCodePoint = 0;
+
+ // Decode the single UTF-8 char
+ if((dwErrCode = UTF8_DecodeCodePoint(pbString, pbStringEnd, dwCodePoint, ccBytesEaten)) != ERROR_SUCCESS)
+ {
+ // Exactly one byte should be eaten on error
+ assert(ccBytesEaten == 1);
+
+ // If invalid chars are allowed, we replace the result with 0xFFFD
+ if(dwFlags & SFILE_UTF8_ALLOW_INVALID_CHARS)
+ {
+ // Replace the code point with invalid marker and continue on the next character
+ dwCodePoint = SFILE_UTF8_INVALID_CHARACTER;
+ dwErrCode = ERROR_SUCCESS;
+ }
+
+ // If the invalid chars are not allowed, we put the invalid char to the stack
+ else
+ {
+ // Flush the invalid characters, if full
+ if(nInvalidChars >= _countof(InvalidChars))
+ {
+ nOutLength = UTF8_FlushInvalidChars(szBuffer, ccBuffer, nOutLength, InvalidChars, nInvalidChars);
+ nInvalidChars = 0;
+ }
+
+ // Put the invalid char to the stack
+ InvalidChars[nInvalidChars++] = pbString[0];
+ pbString++;
+ continue;
+ }
+ }
+
+ // Check whether the unicode char is not out of range
+ assert(dwCodePoint <= SFILE_UNICODE_MAX);
+
+ // Move the source pointer by the number of bytes eaten
+ pbString = pbString + ccBytesEaten;
+
+ // Flush the invalid characters, if any
+ nOutLength = UTF8_FlushInvalidChars(szBuffer, ccBuffer, nOutLength, InvalidChars, nInvalidChars);
+ nInvalidChars = 0;
+
+#ifdef STORMLIB_WIDE_CHAR
+ {
+ unsigned short Utf16Buffer[2];
+
+ // Encode the code point into UTF-16
+ nCharLength = UTF16_EncodeCodePoint(dwCodePoint, Utf16Buffer);
+
+ // Write the encoded UTF-16 to the output buffer, if present
+ if(szBuffer != NULL && (nOutLength + nCharLength) < ccBuffer)
+ {
+ memcpy(szBuffer + nOutLength, Utf16Buffer, nCharLength * sizeof(unsigned short));
+ }
+ }
+#else
+ {
+ BYTE Utf8Buffer[4];
+
+ // Encode the code point into UTF-8
+ nCharLength = UTF8_EncodeCodePoint(dwCodePoint, Utf8Buffer);
+
+ // Write the encoded UTF-16 to the output buffer, if present
+ if(szBuffer != NULL && (nOutLength + nCharLength) < ccBuffer)
+ {
+ memcpy(szBuffer + nOutLength, Utf8Buffer, nCharLength);
+ }
+ }
+#endif
+
+ // Increment the output length
+ nOutLength = nOutLength + nCharLength;
+ }
+
+ // Flush the invalid characters, if any
+ nOutLength = UTF8_FlushInvalidChars(szBuffer, ccBuffer, nOutLength, InvalidChars, nInvalidChars);
+ nInvalidChars = 0;
+
+ // Terminate the string with zero, if we still have space
+ if(szBuffer != NULL && nOutLength < ccBuffer)
+ szBuffer[nOutLength] = 0;
+ nOutLength++;
+
+ // Give the output length, if required
+ if(pOutLength != NULL)
+ pOutLength[0] = nOutLength;
+ return dwErrCode;
+}
+
+DWORD WINAPI SMemFileNameToUTF8(
+ void * lpBuffer, // Pointer to the output buffer. If NULL, the function will calulate the needed length
+ size_t ccBuffer, // Length of the output buffer (must include EOS)
+ const TCHAR * szString, // Pointer to the begin of the string
+ const TCHAR * szStringEnd, // Pointer to the end of string. If NULL, it's assumed to be zero-terminated
+ DWORD /* dwFlags */, // Additional flags
+ size_t * pOutLength = NULL) // Pointer to a variable that receives the needed length in bytes (optional)
+{
+ LPBYTE pbBuffer = (LPBYTE)lpBuffer;
+ size_t nOutLength = 0;
+ DWORD dwErrCode = ERROR_SUCCESS;
+
+ // Set the end of the input if not specified
+ if(szStringEnd == NULL)
+ szStringEnd = szString + _tcslen(szString);
+
+ // Keep conversion as long
+ while(szString < szStringEnd)
+ {
+ size_t ccCharsEaten = 0;
+ size_t nUtf8Length;
+ DWORD dwCodePoint = 0;
+ BYTE Utf8Buffer[MAX_INVALID_CHARS];
+
+ // Check for encoded sequence of bytes
+ if(szString[0] == '%')
+ {
+ // If there is a single hexa number ("%c7"), decode that number
+ if((szString + 3) <= szStringEnd)
+ {
+ TCHAR HexaString[3] = {0};
+
+ HexaString[0] = szString[1];
+ HexaString[1] = szString[2];
+ if(SMemStrToBin(HexaString, Utf8Buffer, 1) == ERROR_SUCCESS)
+ {
+ nOutLength = UTF8_FlushBinBuffer(pbBuffer, ccBuffer, nOutLength, Utf8Buffer, 1);
+ szString += 3;
+ continue;
+ }
+ }
+
+ // If there is an escaped sequence ("%u[aabbcc]"), decode that sequence
+ if((nUtf8Length = UTF16_IsEncodedCharSequence(szString, szStringEnd, Utf8Buffer)) != 0)
+ {
+ nOutLength = UTF8_FlushBinBuffer(pbBuffer, ccBuffer, nOutLength, Utf8Buffer, nUtf8Length);
+ szString += (nUtf8Length * 2) + 4;
+ continue;
+ }
+ }
+
+#ifdef STORMLIB_WIDE_CHAR
+ // Try to decode the code point from UTF-16
+ if((dwErrCode = UTF16_DecodeCodePoint(szString, szStringEnd, dwCodePoint, ccCharsEaten)) != ERROR_SUCCESS)
+ return dwErrCode;
+#else
+ // Try to decode the code point from UTF-16
+ if((dwErrCode = UTF8_DecodeCodePoint((const BYTE *)szString, (const BYTE *)szStringEnd, dwCodePoint, ccCharsEaten)) != ERROR_SUCCESS)
+ return dwErrCode;
+#endif
+
+ // Check whether the unicode char is not out of range
+ assert(dwCodePoint <= SFILE_UNICODE_MAX);
+
+ // Move the source pointer by the number of bytes eaten
+ szString = szString + ccCharsEaten;
+
+ // Encode the UNICODE char
+ nUtf8Length = UTF8_EncodeCodePoint(dwCodePoint, Utf8Buffer);
+
+ // Do we have enough space in the buffer?
+ if(pbBuffer != NULL && (nOutLength + nUtf8Length) < ccBuffer)
+ {
+ // Write the encoded UTF-16 to the output
+ memcpy(pbBuffer + nOutLength, Utf8Buffer, nUtf8Length);
+ }
+
+ // Increment the output length
+ nOutLength = nOutLength + nUtf8Length;
+ }
+
+ // Terminate the string with zero, if we still have space
+ if(pbBuffer != NULL && nOutLength < ccBuffer)
+ pbBuffer[nOutLength] = 0;
+ nOutLength++;
+
+ // Give the output length, if required
+ if(pOutLength != NULL)
+ pOutLength[0] = nOutLength;
+ return dwErrCode;
+}
diff --git a/src/StormCommon.h b/src/StormCommon.h
index c050093..1e67a27 100644
--- a/src/StormCommon.h
+++ b/src/StormCommon.h
@@ -154,14 +154,15 @@ extern LCID g_lcFileLocale; // Preferred file locale and pla
//-----------------------------------------------------------------------------
// Conversion to uppercase/lowercase (and "/" to "\")
-extern unsigned char AsciiToLowerTable[256];
-extern unsigned char AsciiToUpperTable[256];
+extern const unsigned char AsciiToLowerTable[256];
+extern const unsigned char AsciiToUpperTable[256];
+extern const unsigned char SMemCharToByte[0x80];
//-----------------------------------------------------------------------------
// Safe string functions
template <typename XCHAR, typename XINT>
-XCHAR * IntToString(XCHAR * szBuffer, size_t cchMaxChars, XINT nValue, size_t nDigitCount = 0)
+XCHAR * SMemIntToStr(XCHAR * szBuffer, size_t cchMaxChars, XINT nValue, size_t nDigitCount = 0)
{
XCHAR * szBufferEnd = szBuffer + cchMaxChars - 1;
XCHAR szNumberRev[0x20];
@@ -197,6 +198,72 @@ XCHAR * IntToString(XCHAR * szBuffer, size_t cchMaxChars, XINT nValue, size_t nD
return szBuffer;
}
+template <typename XCHAR>
+DWORD SMemBinToStr(XCHAR * szBuffer, size_t cchBuffer, const void * pvBinary, size_t cbBinary)
+{
+ const unsigned char * pbBinary = (const unsigned char *)pvBinary;
+ const char * SMemIntToHex = "0123456789abcdef";
+
+ // The size of the string must be enough to hold the binary + EOS
+ if(cchBuffer < ((cbBinary * 2) + 1))
+ return ERROR_INSUFFICIENT_BUFFER;
+
+ // Convert the string to the array of MD5
+ // Copy the blob data as text
+ for(size_t i = 0; i < cbBinary; i++)
+ {
+ *szBuffer++ = SMemIntToHex[pbBinary[0] >> 0x04];
+ *szBuffer++ = SMemIntToHex[pbBinary[0] & 0x0F];
+ pbBinary++;
+ }
+
+ // Terminate the string
+ *szBuffer = 0;
+ return ERROR_SUCCESS;
+}
+
+template <typename XCHAR>
+DWORD SMemStrToBin(const XCHAR * szString, void * pvBinary, size_t cbBinary, size_t * PtrBinary = NULL)
+{
+ LPBYTE pbBinary = (LPBYTE)pvBinary;
+ LPBYTE pbBinaryEnd = pbBinary + cbBinary;
+ LPBYTE pbSaveBinary = pbBinary;
+
+ // Verify parameter
+ if(szString != NULL && szString[0] != 0)
+ {
+ // Work as long as we have at least 2 characters ready
+ while(szString[0] != 0 && szString[1] != 0)
+ {
+ // Convert both to unsigned char to get rid of negative indexes produced by szString[x]
+ BYTE StringByte0 = (BYTE)szString[0];
+ BYTE StringByte1 = (BYTE)szString[1];
+
+ // Each character must be within the range of 0x80
+ if(StringByte0 > 0x80 || StringByte1 > 0x80)
+ return ERROR_INVALID_PARAMETER;
+ if(SMemCharToByte[StringByte0] == 0xFF || SMemCharToByte[StringByte1] == 0xFF)
+ return ERROR_INVALID_PARAMETER;
+
+ // Overflow check
+ if(pbBinary >= pbBinaryEnd)
+ return ERROR_INSUFFICIENT_BUFFER;
+
+ *pbBinary++ = (SMemCharToByte[StringByte0] << 0x04) | SMemCharToByte[StringByte1];
+ szString += 2;
+ }
+
+ // Odd number of chars?
+ if(szString[0] != 0 && szString[1] == 0)
+ return ERROR_INVALID_PARAMETER;
+ }
+
+ // Give the length
+ if(PtrBinary != NULL)
+ PtrBinary[0] = pbBinary - pbSaveBinary;
+ return ERROR_SUCCESS;
+}
+
char * StringCopy(char * szTarget, size_t cchTarget, const char * szSource);
void StringCat(char * szTarget, size_t cchTargetMax, const char * szSource);
void StringCreatePseudoFileName(char * szBuffer, size_t cchMaxChars, unsigned int nIndex, const char * szExtension);
diff --git a/src/StormLib.h b/src/StormLib.h
index 4072a7d..bf884c5 100644
--- a/src/StormLib.h
+++ b/src/StormLib.h
@@ -1,7 +1,7 @@
/*****************************************************************************/
/* StormLib.h Copyright (c) Ladislav Zezula 1999-2017 */
/*---------------------------------------------------------------------------*/
-/* StormLib library v 9.22 */
+/* StormLib library v 9.30 */
/* */
/* Author : Ladislav Zezula */
/* E-mail : ladik@zezula.net */
@@ -74,6 +74,7 @@
/* 12.12.16 9.21 Lad Release 9.21 */
/* 10.11.17 9.22 Lad Release 9.22 */
/* 28.09.22 9.24 Lad lcLocale -> lcFileLocale, also contains platform */
+/* 01.11.24 9.30 Lad Added conversion from UTF-8 to file name and back */
/*****************************************************************************/
#ifndef __STORMLIB_H__
@@ -143,8 +144,8 @@ extern "C" {
//-----------------------------------------------------------------------------
// Defines
-#define STORMLIB_VERSION 0x091A // Current version of StormLib
-#define STORMLIB_VERSION_STRING "9.26" // Current version of StormLib as string
+#define STORMLIB_VERSION 0x091E // Current version of StormLib
+#define STORMLIB_VERSION_STRING "9.30" // Current version of StormLib as string
#define ID_MPQ 0x1A51504D // MPQ archive header ID ('MPQ\x1A')
#define ID_MPQ_USERDATA 0x1B51504D // MPQ userdata entry ('MPQ\x1B')
@@ -1132,6 +1133,30 @@ int WINAPI SCompDecompress (void * pvOutBuffer, int * pcbOutBuffer, void * pv
int WINAPI SCompDecompress2(void * pvOutBuffer, int * pcbOutBuffer, void * pvInBuffer, int cbInBuffer);
//-----------------------------------------------------------------------------
+// Conversion of UTF-8 (MPQ listfiles) into file name safe strings
+
+#define SFILE_UTF8_ALLOW_INVALID_CHARS 0x01 // If set, then the function will treat invalid chars like like MultiByteToWideChar
+#define SFILE_UTF8_INVALID_CHARACTER 0xFFFD // Marker of an invalid character
+#define SFILE_UNICODE_MAX 0x10FFFF // The highest valid UNICODE char
+
+// Conversion of MPQ file name to file-name-safe string
+DWORD WINAPI SMemUTF8ToFileName(
+ TCHAR * szBuffer, // Pointer to the output buffer. If NULL, the function will calulate the needed length
+ size_t ccBuffer, // Length of the output buffer (must include EOS)
+ const void * lpString, // Pointer to the begin of the string
+ const void * lpStringEnd, // Pointer to the end of string. If NULL, it's assumed to be zero-terminated
+ DWORD dwFlags, // Additional flags
+ size_t * pOutLength); // Pointer to a variable that receives the needed length (optional)
+
+DWORD WINAPI SMemFileNameToUTF8(
+ void * lpBuffer, // Pointer to the output buffer. If NULL, the function will calulate the needed length
+ size_t ccBuffer, // Length of the output buffer (must include EOS)
+ const TCHAR * szString, // Pointer to the begin of the string
+ const TCHAR * szStringEnd, // Pointer to the end of string. If NULL, it's assumed to be zero-terminated
+ DWORD dwFlags, // Reserved
+ size_t * pOutLength); // Pointer to a variable that receives the needed length in bytes (optional)
+
+//-----------------------------------------------------------------------------
// Non-Windows support for SetLastError/GetLastError
#ifndef STORMLIB_WINDOWS
diff --git a/src/StormPort.h b/src/StormPort.h
index aa309e1..f00afd6 100644
--- a/src/StormPort.h
+++ b/src/StormPort.h
@@ -44,6 +44,10 @@
#define _CRT_NON_CONFORMING_SWPRINTFS
#endif
+ #if defined(UNICODE) || defined(_UNICODE)
+ #define STORMLIB_WIDE_CHAR
+ #endif
+
#include <tchar.h>
#include <assert.h>
#include <ctype.h>
@@ -52,15 +56,9 @@
// Suppress definitions of `min` and `max` macros by <windows.h>:
#define NOMINMAX 1
#include <windows.h>
-
#include <wininet.h>
- #define STORMLIB_LITTLE_ENDIAN
- #ifdef _WIN64
- #define STORMLIB_64BIT
- #else
- #define STORMLIB_32BIT
- #endif
+ #define STORMLIB_LITTLE_ENDIAN
#define STORMLIB_CDECL __cdecl
@@ -74,7 +72,6 @@
#if !defined(STORMLIB_PLATFORM_DEFINED) && defined(__APPLE__) // Mac BSD API
- // Macintosh
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
@@ -320,13 +317,8 @@
// Definition of Windows-specific types for non-Windows platforms
#ifndef STORMLIB_WINDOWS
- #if __LP64__
- #define STORMLIB_64BIT
- #else
- #define STORMLIB_32BIT
- #endif
- // __cdecl meand nothing on non-Windows
+ // __cdecl means nothing on non-Windows
#define STORMLIB_CDECL /* */
// Typedefs for ANSI C
@@ -340,7 +332,7 @@
typedef long long LONGLONG;
typedef unsigned long long ULONGLONG;
typedef void * HANDLE;
- typedef void * LPOVERLAPPED; // Unsupported on Linux and Mac
+ typedef void * LPOVERLAPPED;
typedef char TCHAR;
typedef unsigned int LCID;
typedef LONG * PLONG;
@@ -351,7 +343,7 @@
typedef char * LPTSTR;
typedef char * LPSTR;
- #ifdef STORMLIB_32BIT
+ #ifndef __LP64__
#define _LZMA_UINT32_IS_ULONG
#endif
@@ -409,12 +401,14 @@
#define ERROR_DISK_FULL ENOSPC
#define ERROR_ALREADY_EXISTS EEXIST
#define ERROR_INSUFFICIENT_BUFFER ENOBUFS
- #define ERROR_BAD_FORMAT 1000 // No such error code under Linux
- #define ERROR_NO_MORE_FILES 1001 // No such error code under Linux
- #define ERROR_HANDLE_EOF 1002 // No such error code under Linux
- #define ERROR_CAN_NOT_COMPLETE 1003 // No such error code under Linux
- #define ERROR_FILE_CORRUPT 1004 // No such error code under Linux
- #define ERROR_BUFFER_OVERFLOW 1005 // No such error code under Linux
+ #define ERROR_BAD_FORMAT 1000 // No such error codes under Linux
+ #define ERROR_NO_MORE_FILES 1001
+ #define ERROR_HANDLE_EOF 1002
+ #define ERROR_CAN_NOT_COMPLETE 1003
+ #define ERROR_FILE_CORRUPT 1004
+ #define ERROR_BUFFER_OVERFLOW 1005
+ #define ERROR_INVALID_DATA 1006
+ #define ERROR_NO_UNICODE_TRANSLATION 1007
#endif
// Macros that can sometimes be missing
diff --git a/src/wdk/sources-cpp.cpp b/src/wdk/sources-cpp.cpp
index f289975..a9d7ba3 100644
--- a/src/wdk/sources-cpp.cpp
+++ b/src/wdk/sources-cpp.cpp
@@ -24,3 +24,4 @@
#include "src\SFilePatchArchives.cpp"
#include "src\SFileReadFile.cpp"
#include "src\SFileVerify.cpp"
+#include "src\SMemUtf8.cpp"
diff --git a/test/StormTest.cpp b/test/StormTest.cpp
index 0312abb..7cd885d 100755
--- a/test/StormTest.cpp
+++ b/test/StormTest.cpp
@@ -275,6 +275,21 @@ static SFILE_MARKERS MpqMarkers[] =
static TCHAR szMpqDirectory[MAX_PATH+1];
size_t cchMpqDirectory = 0;
+inline bool AssertTrue(bool bCondition)
+{
+ if(!bCondition)
+ {
+#ifdef STORMLIB_WINDOWS
+ __debugbreak();
+#else
+ assert(false);
+#endif
+ }
+ return bCondition;
+}
+
+#define ASSERT_TRUE(condition) { if(!AssertTrue(condition)) { return false; } }
+
static EXTRA_TYPE GetExtraType(const void * pExtra)
{
if(pExtra != NULL)
@@ -328,30 +343,6 @@ LPCTSTR GetRelativePath(LPCTSTR szFullPath)
return _T("");
}
-// Converts binary array to string.
-// The caller must ensure that the buffer has at least ((cbBinary * 2) + 1) characters
-template <typename xchar>
-xchar * StringFromBinary(LPBYTE pbBinary, size_t cbBinary, xchar * szBuffer)
-{
- const char * IntToHexChar = "0123456789abcdef";
- xchar * szSaveBuffer = szBuffer;
-
- // Verify the binary pointer
- if(pbBinary && cbBinary)
- {
- // Convert the bytes to string array
- for(size_t i = 0; i < cbBinary; i++)
- {
- *szBuffer++ = IntToHexChar[pbBinary[i] >> 0x04];
- *szBuffer++ = IntToHexChar[pbBinary[i] & 0x0F];
- }
- }
-
- // Terminate the string
- *szBuffer = 0;
- return szSaveBuffer;
-}
-
const char * GetFileText(PFILE_DATA pFileData)
{
const char * szFileText = (const char *)(pFileData->FileData);
@@ -1764,7 +1755,7 @@ static DWORD VerifyDataChecksum(TLogHelper & Logger, HANDLE hMpq, DWORD dwSearch
// Check the MD5 hash, if given
if(IS_VALID_STRING(szNameHash))
{
- StringFromBinary(NameHash, MD5_DIGEST_SIZE, szNameHash);
+ SMemBinToStr(szNameHash, _countof(szNameHash), NameHash, MD5_DIGEST_SIZE);
if(_stricmp(szNameHash, szExpectedHash))
{
Logger.PrintMessage("Extracted files MD5 mismatch (expected: %s, obtained: %s)", szExpectedHash, szNameHash);
@@ -3785,23 +3776,91 @@ static DWORD TestReplaceFile(LPCTSTR szMpqPlainName, LPCTSTR szFilePlainName, LP
return dwErrCode;
}
-static void Test_PlayingSpace()
+static bool TestUtfConversion(const void * lpString)
{
- HANDLE hFile = NULL;
- HANDLE hMpq = NULL;
+ LPTSTR szBuffer;
+ LPBYTE pbBuffer;
+ size_t nLength1 = 0;
+ size_t nLength2 = 0;
+ DWORD dwErrCode1;
+ DWORD dwErrCode2;
+ TCHAR szWideBuffer[1];
+ BYTE szByteBuffer[1];
+ int nResult;
+
+ // Get the number of bytes of the buffer while the output buffer is 0
+ dwErrCode1 = SMemUTF8ToFileName(NULL, 0, lpString, NULL, 0, &nLength1);
+
+ // Check the number of bytes when the buffer is non-NULL, but buffer length is insufficient
+ dwErrCode2 = SMemUTF8ToFileName(szWideBuffer, _countof(szWideBuffer), lpString, NULL, 0, &nLength2);
+ ASSERT_TRUE(dwErrCode2 == dwErrCode1);
+ ASSERT_TRUE(nLength2 == nLength1);
+
+ // Check the number of bytes when the buffer is non-NULL, and buffer length is sufficient
+ if((szBuffer = STORM_ALLOC(TCHAR, nLength1)) != NULL)
+ {
+ dwErrCode2 = SMemUTF8ToFileName(szBuffer, nLength1, lpString, NULL, 0, &nLength2);
+ ASSERT_TRUE(dwErrCode2 == dwErrCode1);
+ ASSERT_TRUE(nLength2 == nLength1);
+
+ // Get the number of bytes of the buffer while the output buffer is 0
+ dwErrCode1 = SMemFileNameToUTF8(NULL, 0, szBuffer, NULL, 0, &nLength1);
+
+ // Check the number of bytes when the buffer is non-NULL, but buffer length is insufficient
+ dwErrCode2 = SMemFileNameToUTF8(szByteBuffer, _countof(szByteBuffer), szBuffer, NULL, 0, &nLength2);
+ ASSERT_TRUE(dwErrCode2 == dwErrCode1);
+ ASSERT_TRUE(nLength2 == nLength1);
+
+ // Check the conversion into a buffer large enough
+ if((pbBuffer = STORM_ALLOC(BYTE, nLength1)) != NULL)
+ {
+ dwErrCode2 = SMemFileNameToUTF8(pbBuffer, nLength1, szBuffer, NULL, 0, &nLength2);
+ ASSERT_TRUE(dwErrCode2 == dwErrCode1);
+ ASSERT_TRUE(nLength2 == nLength1);
- if(SFileOpenArchive(_T("c:\\RedHero vs 7Com22 (Final Stage GOD).scx"), 0, 0, &hMpq))
- {
- SFileSetLocale(0x409);
+ nResult = memcmp(pbBuffer, lpString, nLength1);
+ ASSERT_TRUE(nResult == 0);
+
+ STORM_FREE(pbBuffer);
+ }
+
+ STORM_FREE(szBuffer);
+ }
+ return true;
+}
- if(SFileOpenFileEx(hMpq, "staredit\\scenario.chk", 0, &hFile))
+static DWORD TestUtf8Conversions(const BYTE * szTestString, const TCHAR * szListFile)
+{
+ SFILE_FIND_DATA sf;
+ HANDLE hFind;
+ TCHAR szFullPath[MAX_PATH];
+
+ // Check conversion of the invalid UTF8 string
+ TestUtfConversion(szTestString);
+
+ // Create full path of the listfile
+ CreateFullPathName(szFullPath, _countof(szFullPath), szListFileDir, szListFile);
+
+ // Test all file names in the Chinese listfile
+ hFind = SListFileFindFirstFile(NULL, szFullPath, "*", &sf);
+ if(hFind != NULL)
+ {
+ while(SListFileFindNextFile(hFind, &sf))
{
- SFileCloseFile(hFile);
+ if(!TestUtfConversion(sf.cFileName))
+ {
+ return ERROR_INVALID_DATA;
+ }
}
- SFileCloseArchive(hMpq);
+ SListFileFindClose(hFind);
}
+
+ return ERROR_SUCCESS;
}
+static void Test_PlayingSpace()
+{}
+
//-----------------------------------------------------------------------------
// Tables
@@ -3814,13 +3873,59 @@ static LPCTSTR szDiabdatMPQ = _T("MPQ_1997_v1_Diablo1_DIABDAT.MPQ");
static const TEST_EXTRA_ONEFILE LfBliz = {ListFile, _T("ListFile_Blizzard.txt")};
static const TEST_EXTRA_ONEFILE LfWotI = {ListFile, _T("ListFile_WarOfTheImmortals.txt")};
+static const TEST_EXTRA_ONEFILE LfBad1 = {ListFile, _T("ListFile_UTF8_Bad.txt")};
static const BYTE szMpqFileNameUTF8[] = {0x4D, 0x50, 0x51, 0x5F, 0x32, 0x30, 0x32, 0x34, 0x5F, 0x76, 0x31, 0x5F, 0xE6, 0x9D, 0x82, 0xE9, 0xB1, 0xBC, 0xE5, 0x9C, 0xB0, 0xE7, 0x89, 0xA2, 0x5F, 0x30, 0x2E, 0x30, 0x38, 0x34, 0x62, 0x65, 0x74, 0x61, 0x34, 0x36, 0x2E, 0x77, 0x33, 0x78, 0x00};
static const BYTE szLstFileNameUTF8[] = {0x4C, 0x69, 0x73, 0x74, 0x46, 0x69, 0x6C, 0x65, 0x5F, 0xE6, 0x9D, 0x82, 0xE9, 0xB1, 0xBC, 0xE5, 0x9C, 0xB0, 0xE7, 0x89, 0xA2, 0x5F, 0x30, 0x2E, 0x30, 0x38, 0x34, 0x62, 0x65, 0x74, 0x61, 0x34, 0x36, 0x2E, 0x74, 0x78, 0x74, 0x00};
-static const TEST_EXTRA_UTF8 MpqUtf8 = {Utf8File, szMpqFileNameUTF8, szLstFileNameUTF8};
-static const TEST_EXTRA_TWOFILES TwoFilesD1 = {TwoFiles, "music\\dintro.wav", "File00000023.xxx"};
-static const TEST_EXTRA_TWOFILES TwoFilesD2 = {TwoFiles, "waitingroombkgd.dc6"};
+static const BYTE FileNameInvalidUTF8[] =
+{
+// Hexadecimal Binary UTF-16 String
+// ---- --------------------------------- ------ ------
+ 0x7c, // --> 01111100 --> 0x007c %u[7cb7]
+ 0xb7, // --> 10110111(bad) --> 0xfffd
+ 0xc9, 0xb7, // --> 11001001 10110111 --> 0x0277 \x0277
+ 0xc9, /* ca */ // --> 11001001 11001010(bad) --> 0xfffd %u[c9cac0bde7]
+ 0xca, /* c0 */ // --> 11001010 11000000(bad) --> 0xfffd
+ 0xc0, /* bd */ // --> 11000000 10111101(bad) --> 0x003d(bad)
+ 0xbd, // --> 10111101(bad) --> 0xfffd
+ 0xe7, /* c4 */ // --> 11100111 11000100(bad) --> 0xfffd
+ 0xc4, 0xa7, // --> 11000100 10100111 --> 0x0127 \x0127
+ 0xca, /* de */ // --> 11001010 11011110(bad) --> 0xfffd %ca
+ 0xde, 0xbb, // --> 11011110 10111011 --> 0x07bb \x07bb
+ 0xb6, // --> 10110110(bad) --> 0xfffd %b6
+ 0xd3, 0xad, // --> 11010011 10101101 --> 0x04ed \x04ed
+ 0xc4, /* fa */ // --> 11000100 11111010(bad) --> 0xfffd %u[c4fa]
+ 0xfa, /* 5f */ // --> 11111010 01011111(bad) --> 0xfffd
+ 0x5f, // --> 01011111 --> 0x005f _
+ 0xa1, // --> 10100001(bad) --> 0xfffd %u[a1eea1f0a1ef]
+ 0xee, /* a1 f0 */ // --> 11101110 10100001 11110000(bad) --> 0xfffd
+ 0xa1, // --> 10100001(bad) --> 0xfffd
+ 0xf0, /* a1 ef */ // --> 11110000 10100001 11101111(bad) --> 0xfffd
+ 0xa1, // --> 10100001(bad) --> 0xfffd
+ 0xef, /* 5f */ // --> 11101111 01011111(bad) --> 0xfffd
+ 0x5f, // --> 01011111 --> 0x005f _
+ 0xf0, /* 80 80 80 */ // --> 11110000 10000000 10000000 10000000 --> 0x0000(bad) %u[f0808080]
+ 0x80, // --> 10000000(bad) --> 0xfffd
+ 0x80, // --> 10000000(bad) --> 0xfffd
+ 0x80, // --> 10000000(bad) --> 0xfffd
+ 0xe9, 0xa3, 0x9e, // --> 11101001 10100011 10011110 --> 0x98de \x98de
+ 0xe4, 0xb8, 0x96, // --> 11100100 10111000 10010110 --> 0x4e16 \x4e16
+ 0xe7, 0x95, 0x8c, // --> 11100111 10010101 10001100 --> 0x754c \x754c
+ 0xe9, 0xad, 0x94, // --> 11101001 10101101 10010100 --> 0x9b54 \x9b54
+ 0xe5, 0x85, 0xbd, // --> 11100101 10000101 10111101 --> 0x517d \x517d
+ 0xe6, 0xac, 0xa2, // --> 11100110 10101100 10100010 --> 0x6b22 \x6b22
+ 0xe8, 0xbf, 0x8e, // --> 11101000 10111111 10001110 --> 0x8fce \x8fce
+ 0xe6, 0x82, 0xa8, // --> 11100110 10000010 10101000 --> 0x60a8 \x60a8
+ 0x2e, // --> 00101110 --> 0x002e \x002e
+ 0x6d, 0x64, 0x78, // --> 01101101 01100100 01111000 --> ".mdx"
+ 0x00 // --> 00000000 --> EOS
+};
+
+static const TEST_EXTRA_UTF8 MpqUtf8 = {Utf8File, szMpqFileNameUTF8, szLstFileNameUTF8};
+
+static const TEST_EXTRA_TWOFILES TwoFilesD1 = {TwoFiles, "music\\dintro.wav", "File00000023.xxx"};
+static const TEST_EXTRA_TWOFILES TwoFilesD2 = {TwoFiles, "waitingroombkgd.dc6"};
static const TEST_EXTRA_TWOFILES TwoFilesW3M = {TwoFiles, "file00000002.blp"};
static const TEST_EXTRA_TWOFILES TwoFilesW3X = {TwoFiles, "BlueCrystal.mdx"};
@@ -4046,6 +4151,8 @@ static const TEST_INFO1 Test_OpenMpqs[] =
{_T("MPQ_2023_v1_GreenTD.w3x"), NULL, "a8d91fc4e52d7c21ff7feb498c74781a", 2004}, // Corrupt sector checksum table in file #A0
{_T("MPQ_2023_v4_1F644C5A.SC2Replay"), NULL, "b225828ffbf5037553e6a1290187caab", 17}, // Corrupt patch info of the "(attributes)" file
{_T("<Chinese MPQ name>"), NULL, "67faeffd0c0aece205ac8b7282d8ad8e", 4697, &MpqUtf8}, // Chinese name of the MPQ
+ {_T("MPQ_2024_v1_BadUtf8_5.0.2.w3x"), NULL, "be34f9862758f021a1c6c77df3cd4f05", 6393, &LfBad1}, // Bad UTF-8 sequences in file names
+
// Protected archives
{_T("MPQ_2002_v1_ProtectedMap_InvalidUserData.w3x"), NULL, "b900364cc134a51ddeca21a13697c3ca", 79},
@@ -4224,6 +4331,9 @@ int _tmain(int argc, TCHAR * argv[])
// Placeholder function for various testing purposes
Test_PlayingSpace();
+ // Test the UTF-8 conversions
+ TestUtf8Conversions(FileNameInvalidUTF8, LfBad1.szFile);
+
#ifdef TEST_COMMAND_LINE
// Test-open MPQs from the command line. They must be plain name
// and must be placed in the Test-MPQs folder
@@ -4235,10 +4345,9 @@ int _tmain(int argc, TCHAR * argv[])
#ifdef TEST_LOCAL_LISTFILE // Tests on a local listfile
if(dwErrCode == ERROR_SUCCESS)
- {
- TestOnLocalListFile(_T("FLAT-MAP:listfile-test.txt"));
+ dwErrCode = TestOnLocalListFile(_T("FLAT-MAP:listfile-test.txt"));
+ if(dwErrCode == ERROR_SUCCESS)
dwErrCode = TestOnLocalListFile(_T("listfile-test.txt"));
- }
#endif // TEST_LOCAL_LISTFILE
#ifdef TEST_STREAM_OPERATIONS // Test file stream operations
diff --git a/test/stormlib-test-001.txt b/test/stormlib-test-001.txt
index db8df0e..1225add 100644
--- a/test/stormlib-test-001.txt
+++ b/test/stormlib-test-001.txt
@@ -1,4 +1,4 @@
-==== Test Suite for StormLib version 9.26 ====
+==== Test Suite for StormLib version 9.30 ====
InitWorkFolder: Work directory \Multimedia\MPQs (default)
TestLiFiSearch (FLAT-MAP:listfile-test.txt) succeeded.
TestLiFiSearch (listfile-test.txt) succeeded.
@@ -61,6 +61,7 @@ TestReadingMpq (MPQ_2023_v4_UTF8.s2ma) succeeded.
TestReadingMpq (MPQ_2023_v1_GreenTD.w3x) succeeded.
TestReadingMpq (MPQ_2023_v4_1F644C5A.SC2Replay) succeeded.
TestReadingMpq (<Chinese MPQ name>) succeeded.
+TestReadingMpq (MPQ_2024_v1_BadUtf8_5.0.2.w3x) succeeded.
TestReadingMpq (MPQ_2002_v1_ProtectedMap_InvalidUserData.w3x) succeeded.
TestReadingMpq (MPQ_2002_v1_ProtectedMap_InvalidMpqFormat.w3x) succeeded.
TestReadingMpq (MPQ_2002_v1_ProtectedMap_Spazzler.w3x) succeeded.