aboutsummaryrefslogtreecommitdiff
path: root/src/SMemUtf8.cpp
blob: 583242243c9ba569c634b598efb04829afbca2d7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
/*****************************************************************************/
/* SFileVerify.cpp                        Copyright (c) Ladislav Zezula 2010 */
/*---------------------------------------------------------------------------*/
/* Support for conversion of UTF-8 <-> File name                             */
/*                                                                           */
/* File names in the MPQs are assumed to be UTF-8. However, bad sequences    */
/* or filename unsafe characters are allowed in the list files, but won't    */
/* work in unpacking files from MPQ to a local file.                         */
/*                                                                           */
/* This module contains cross-platform comparable conversion between UTF-8   */
/* and file names that will produce identical file names across platforms.   */
/*---------------------------------------------------------------------------*/
/*   Date    Ver   Who  Comment                                              */
/* --------  ----  ---  -------                                              */
/* 31.10.24  1.00  Lad  Created                                              */
/*****************************************************************************/

#define __STORMLIB_SELF__
#include "StormLib.h"
#include "StormCommon.h"

//-----------------------------------------------------------------------------
// Local defines

#define MAX_INVALID_CHARS               128         // Maximum number of invalid characters in a row

//-----------------------------------------------------------------------------
// Conversion tables

const unsigned char SMemCharToByte[0x80] =
{
    //   00    01    02    03    04    05    06    07    08    09    0A    0B    0C    0D    0E    0F
        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0xFF
        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x10
        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x20
        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x30
        0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x40
        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x50
        0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x60
        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF  // 0x70
};

//-----------------------------------------------------------------------------
// Local functions

// Bit mask of characters that are file name safe. We will maintain
// the same charset even on non-Windows in order to keep the file names equal
static unsigned int FileNameSafeChars[4] =
{
    0x00000000, 0x2BFF7BFB, 0xFFFFFFFF, 0xEFFFFFFF      // Windows: [0x20-0x7F], except 0x22, 0x2A, 0x2F, 0x3A, 0x3C, 0x3E, 0x3F, 0x7C
//  0xfffffffe, 0xffff7fff, 0xffffffff, 0xffffffff      // Linux:   [0x01-0x7F], except 0x2F
};

static bool UTF8_IsBadFileNameCharacter(DWORD ch)
{
    // It is guaranteed that the character is in range of 0x00 - 0x7F
    assert(ch < 0x80);

    // Use the bit from the table
    return (FileNameSafeChars[ch / 32] & (1 << (ch % 32))) ? false : true;
}

static DWORD UTF8_DecodeSequence(const BYTE * pbString, BYTE BitsMask, size_t ccFollowBytes, DWORD dwMinValue, DWORD dwMaxValue, DWORD & dwCodePoint, size_t & ccBytesEaten)
{
    const BYTE * pbSaveString = pbString;
    DWORD dwAccumulator;

    // Extract the low bits from the leading byte
    dwAccumulator = pbString[0] & BitsMask;
    ccBytesEaten = 1;
    pbString++;

    // Process the follow-up bytes
    for(size_t i = 0; i < ccFollowBytes; i++)
    {
        // Every follow-up byte in the UTF-8 sequence must start with 10xxxxxx
        if((pbString[0] & 0xC0) != 0x80)
            return ERROR_NO_UNICODE_TRANSLATION;

        // Add 6 bits to the accumulator
        dwAccumulator = (dwAccumulator << 6) | (*pbString++ & 0x3F);
    }

    // Check whether the code point is in the given range
    if(!(dwMinValue <= dwAccumulator && dwAccumulator <= dwMaxValue))
        return ERROR_INVALID_DATA;

    // Give the number of bytes eaten and the decoded code point
    ccBytesEaten = (pbString - pbSaveString);
    dwCodePoint = dwAccumulator;
    return ERROR_SUCCESS;
}

// https://en.wikipedia.org/wiki/UTF-8
static DWORD UTF8_DecodeCodePoint(const BYTE * pbString, const BYTE * pbStringEnd, DWORD & dwCodePoint, size_t & ccBytesEaten)
{
    // Reset the number of bytes eaten
    dwCodePoint = SFILE_UTF8_INVALID_CHARACTER;
    ccBytesEaten = 0;

    if(pbString < pbStringEnd)
    {
        // At least one byte will be eaten
        ccBytesEaten = 1;

        // 1st code point (0x00 - 0x7F, 1 byte)
        if(pbString[0] <= 0x7F)
        {
            // This is the perfect spot to check for filename-unsafe characters
            if(UTF8_IsBadFileNameCharacter(pbString[0]))
                return ERROR_NO_UNICODE_TRANSLATION;

            // Decode the 1-byte sequence
            dwCodePoint = pbString[0];
            return ERROR_SUCCESS;
        }

        // 2nd code point (0x80 - 0x7FF, 2 bytes)
        if((pbString[0] & 0xE0) == 0xC0 && (pbString + 2) <= pbStringEnd)
        {
            // Decode the 2-byte sequence
            return UTF8_DecodeSequence(pbString, 0x1F, 1, 0x80, 0x7FF, dwCodePoint, ccBytesEaten);
        }

        // 3rd code point (0x800 - 0xFFFF, 3 bytes)
        // Note: MultiByteToWideChar will not decode 0xE0 0xBF 0xBF (--> 0x0FFF),
        if((pbString[0] & 0xF0) == 0xE0 && (pbString + 3) <= pbStringEnd)
        {
            // Decode the 3-byte sequence
            return UTF8_DecodeSequence(pbString, 0x0F, 2, 0x800, 0xFFFF, dwCodePoint, ccBytesEaten);
        }

        // 4th code point (0x10000 - 0x10FFFF, 4 bytes)
        if((pbString[0] & 0xF8) == 0xF0 && (pbString + 4) <= pbStringEnd)
        {
            // Try to decode 4-byte sequence
            return UTF8_DecodeSequence(pbString, 0x07, 3, 0x10000, SFILE_UNICODE_MAX, dwCodePoint, ccBytesEaten);
        }

        // An invalid UTF-8 sequence encountered
        return ERROR_NO_UNICODE_TRANSLATION;
    }

    // No bytes available. Should never happen
    assert(false);
    return ERROR_BUFFER_OVERFLOW;
}

static size_t UTF8_EncodeSequence(DWORD dwCodePoint, BYTE LeadingByte, DWORD dwFollowByteCount, LPBYTE Utf8Buffer)
{
    DWORD dwByteShift = dwFollowByteCount * 6;

    // Encode the highest byte
    Utf8Buffer[0] = (BYTE)(LeadingByte | (dwCodePoint >> dwByteShift));
    dwByteShift -= 6;

    // Encode the follow bytes
    for(DWORD i = 0; i < dwFollowByteCount; i++)
    {
        // The follow byte must be 10xxxxxx
        Utf8Buffer[i + 1] = (BYTE)(0x80 | ((dwCodePoint >> dwByteShift) & 0x3F));
        dwByteShift -= 6;
    }

    return dwFollowByteCount + 1;
}

static size_t UTF8_EncodeCodePoint(DWORD dwCodePoint, LPBYTE Utf8Buffer)
{
    // 0x00 - 0x7F, 1 byte
    if(dwCodePoint < 0x80)
        return UTF8_EncodeSequence(dwCodePoint, 0x00, 0, Utf8Buffer);

    // 0x80 - 0x7FF
    if(dwCodePoint < 0x800)
        return UTF8_EncodeSequence(dwCodePoint, 0xC0, 1, Utf8Buffer);

    // 0x800 - 0xFFFF
    if(dwCodePoint < 0x10000)
        return UTF8_EncodeSequence(dwCodePoint, 0xE0, 2, Utf8Buffer);

    // 0x800 - 0xFFFF
    if(dwCodePoint < 0x110000)
        return UTF8_EncodeSequence(dwCodePoint, 0xF0, 3, Utf8Buffer);

    // Should never happen
    assert(false);
    return 0;
}

static size_t UTF8_FlushInvalidChars(LPTSTR szBuffer, size_t ccBuffer, size_t nOutLength, LPBYTE InvalidChars, size_t nInvalidChars)
{
    // Case 0: No invalid char -> do nothing
    if(nInvalidChars == 0)
    {
        return nOutLength;
    }

    // Case 1: One invalid char -> %xx (compatible with previous versions of MPQ Editor)
    if(nInvalidChars == 1)
    {
        // Space for 3 characters needed
        if(szBuffer != NULL && (nOutLength + 3) <= ccBuffer)
        {
            szBuffer[nOutLength] = '%';
            SMemBinToStr(szBuffer + nOutLength + 1, ccBuffer - 1, InvalidChars, 1);
        }
        return nOutLength + 3;
    }

    // Case 1: More than one invalid char -> %u[xxyyzz]
    else
    {
        // Enough space for %u[xxyyzz]
        size_t nLengthNeeded = nInvalidChars * 2 + 4;

        // Space for 4 characters needed
        if(szBuffer != NULL && (nOutLength + nLengthNeeded) <= ccBuffer)
        {
            memcpy(szBuffer + nOutLength, _T("%u["), 6);

            SMemBinToStr(szBuffer + nOutLength + 3, ccBuffer - 3, InvalidChars, nInvalidChars);

            szBuffer[nOutLength + nLengthNeeded - 1] = ']';
            szBuffer[nOutLength + nLengthNeeded] = 0;
        }
        return nOutLength + nLengthNeeded;
    }
}

size_t UTF8_FlushBinBuffer(LPBYTE pbBuffer, size_t ccBuffer, size_t nOutLength, LPBYTE BinBuffer, size_t nByteCount)
{
    if(pbBuffer != NULL && (nOutLength + nByteCount) < ccBuffer)
        memcpy(pbBuffer + nOutLength, BinBuffer, nByteCount);
    return nOutLength + nByteCount;
}

#ifdef STORMLIB_WIDE_CHAR
static size_t UTF16_EncodeCodePoint(DWORD dwCodePoint, unsigned short * Utf16Buffer)
{
    // https://en.wikipedia.org/wiki/UTF-16
    if(dwCodePoint <= 0xFFFF)
    {
        Utf16Buffer[0] = (unsigned short)(dwCodePoint);
        return 1;
    }

    if(dwCodePoint <= SFILE_UNICODE_MAX)
    {
        // Fix the code point
        dwCodePoint -= 0x10000;

        // Split the code point to two 10-bit values
        Utf16Buffer[0] = (unsigned short)(0xD800 + (dwCodePoint >> 10));    // High 6 bytes
        Utf16Buffer[1] = (unsigned short)(0xDC00 + (dwCodePoint & 0x3FF));  // Low 10 bytes
        return 2;
    }

    // Should never happen
    assert(false);
    return 0;
}

static DWORD UTF16_DecodeCodePoint(LPCTSTR szString, LPCTSTR szStringEnd, DWORD & dwCodePoint, size_t & ccCharsEaten)
{
    // Reset the number of bytes eaten
    dwCodePoint = SFILE_UTF8_INVALID_CHARACTER;
    ccCharsEaten = 0;

    if(szString < szStringEnd)
    {
        // At least one char will be eaten
        ccCharsEaten = 1;

        // Check for an invalid surrogate pair
        if(0xDC00 <= szString[0] && szString[0] <= 0xDFFF)
        {
            dwCodePoint = SFILE_UTF8_INVALID_CHARACTER;
            return ERROR_NO_UNICODE_TRANSLATION;
        }

        // Check for a valid surrogate pair
        if(0xD800 <= szString[0] && szString[0] <= 0xDBFF && (szString + 1) < szStringEnd)
        {
            dwCodePoint = ((szString[0] - 0xD800) << 10) | (szString[1] - 0xDC00) + 0x10000;
            ccCharsEaten = 2;
            return ERROR_SUCCESS;
        }

        // Direct encoding
        dwCodePoint = szString[0];
        ccCharsEaten = 1;
        return ERROR_SUCCESS;
    }

    // No bytes available. Should never happen
    assert(false);
    return ERROR_BUFFER_OVERFLOW;
}
#endif

size_t UTF16_IsEncodedCharSequence(LPCTSTR szString, LPCTSTR szStringEnd, LPBYTE BinBuffer)
{
    size_t nEncodedChars = 0;

    if((szString + 1) < szStringEnd && *szString++ == '%')
    {
        if((szString + 1) < szStringEnd && *szString++ == 'u')
        {
            if((szString + 1) < szStringEnd && *szString++ == '[')
            {
                // Keep going as long as we can convert
                for(size_t i = 0; i < MAX_INVALID_CHARS; i++)
                {
                    if(szString + (i * 2) >= szStringEnd)
                        break;
                    if(szString[i * 2] == ']')
                        break;
                    nEncodedChars++;
                }

                // Did we encounter the end of the string?
                if(szString + (nEncodedChars * 2) + 1 <= szStringEnd && szString[nEncodedChars * 2] == ']')
                {
                    TCHAR HexaString[MAX_INVALID_CHARS * 2 + 1];

                    // Copy the hexadecimal string
                    memcpy(HexaString, szString, (nEncodedChars * 2) * sizeof(TCHAR));
                    HexaString[nEncodedChars * 2] = 0;

                    // Try to decode the hexa string
                    if(SMemStrToBin(HexaString, BinBuffer, nEncodedChars) == ERROR_SUCCESS)
                    {
                        return nEncodedChars;
                    }
                }
            }
        }
    }
    return 0;
}

//-----------------------------------------------------------------------------
// Public (exported) functions

// Conversion of MPQ file name to file-name-safe string
DWORD WINAPI SMemUTF8ToFileName(
    LPTSTR szBuffer,                // Pointer to the output buffer. If NULL, the function will calulate the needed length
    size_t ccBuffer,                // Length of the output buffer (must include EOS)
    const void * lpString,          // Pointer to the begin of the string
    const void * lpStringEnd,       // Pointer to the end of string. If NULL, it's assumed to be zero-terminated
    DWORD dwFlags,                  // Additional flags
    size_t * pOutLength = NULL)     // Pointer to a variable that receives the needed length (optional)
{
    const BYTE * pbStringEnd = (const BYTE *)lpStringEnd;
    const BYTE * pbString = (const BYTE *)lpString;
    DWORD dwErrCode = ERROR_SUCCESS;
    size_t nInvalidChars = 0;
    size_t nOutLength = 0;
    BYTE InvalidChars[MAX_INVALID_CHARS];

    // Set the end of the input if not specified
    if(pbStringEnd == NULL)
        pbStringEnd = pbString + strlen((char *)pbString);

    // Keep conversion as long
    while(pbString < pbStringEnd)
    {
        size_t ccBytesEaten = 0;
        size_t nCharLength;
        DWORD dwCodePoint = 0;

        // Decode the single UTF-8 char
        if((dwErrCode = UTF8_DecodeCodePoint(pbString, pbStringEnd, dwCodePoint, ccBytesEaten)) != ERROR_SUCCESS)
        {
            // Exactly one byte should be eaten on error
            assert(ccBytesEaten == 1);

            // If invalid chars are allowed, we replace the result with 0xFFFD
            if(dwFlags & SFILE_UTF8_ALLOW_INVALID_CHARS)
            {
                // Replace the code point with invalid marker and continue on the next character
                dwCodePoint = SFILE_UTF8_INVALID_CHARACTER;
                dwErrCode = ERROR_SUCCESS;
            }

            // If the invalid chars are not allowed, we put the invalid char to the stack
            else
            {
                // Flush the invalid characters, if full
                if(nInvalidChars >= _countof(InvalidChars))
                {
                    nOutLength = UTF8_FlushInvalidChars(szBuffer, ccBuffer, nOutLength, InvalidChars, nInvalidChars);
                    nInvalidChars = 0;
                }

                // Put the invalid char to the stack
                InvalidChars[nInvalidChars++] = pbString[0];
                pbString++;
                continue;
            }
        }

        // Check whether the unicode char is not out of range
        assert(dwCodePoint <= SFILE_UNICODE_MAX);

        // Move the source pointer by the number of bytes eaten
        pbString = pbString + ccBytesEaten;

        // Flush the invalid characters, if any
        nOutLength = UTF8_FlushInvalidChars(szBuffer, ccBuffer, nOutLength, InvalidChars, nInvalidChars);
        nInvalidChars = 0;

#ifdef STORMLIB_WIDE_CHAR
        {
            unsigned short Utf16Buffer[2];

            // Encode the code point into UTF-16
            nCharLength = UTF16_EncodeCodePoint(dwCodePoint, Utf16Buffer);

            // Write the encoded UTF-16 to the output buffer, if present
            if(szBuffer != NULL && (nOutLength + nCharLength) < ccBuffer)
            {
                memcpy(szBuffer + nOutLength, Utf16Buffer, nCharLength * sizeof(unsigned short));
            }
        }
#else
        {
            BYTE Utf8Buffer[4];

            // Encode the code point into UTF-8
            nCharLength = UTF8_EncodeCodePoint(dwCodePoint, Utf8Buffer);

            // Write the encoded UTF-16 to the output buffer, if present
            if(szBuffer != NULL && (nOutLength + nCharLength) < ccBuffer)
            {
                memcpy(szBuffer + nOutLength, Utf8Buffer, nCharLength);
            }
        }
#endif

        // Increment the output length
        nOutLength = nOutLength + nCharLength;
    }

    // Flush the invalid characters, if any
    nOutLength = UTF8_FlushInvalidChars(szBuffer, ccBuffer, nOutLength, InvalidChars, nInvalidChars);
    nInvalidChars = 0;

    // Terminate the string with zero, if we still have space
    if(szBuffer != NULL && nOutLength < ccBuffer)
        szBuffer[nOutLength] = 0;
    nOutLength++;

    // Give the output length, if required
    if(pOutLength != NULL)
        pOutLength[0] = nOutLength;
    return dwErrCode;
}

DWORD WINAPI SMemFileNameToUTF8(
    void * lpBuffer,                // Pointer to the output buffer. If NULL, the function will calulate the needed length
    size_t ccBuffer,                // Length of the output buffer (must include EOS)
    const TCHAR * szString,         // Pointer to the begin of the string
    const TCHAR * szStringEnd,      // Pointer to the end of string. If NULL, it's assumed to be zero-terminated
    DWORD /* dwFlags */,            // Additional flags
    size_t * pOutLength = NULL)     // Pointer to a variable that receives the needed length in bytes (optional)
{
    LPBYTE pbBuffer = (LPBYTE)lpBuffer;
    size_t nOutLength = 0;
    DWORD dwErrCode = ERROR_SUCCESS;

    // Set the end of the input if not specified
    if(szStringEnd == NULL)
        szStringEnd = szString + _tcslen(szString);

    // Keep conversion as long
    while(szString < szStringEnd)
    {
        size_t ccCharsEaten = 0;
        size_t nUtf8Length;
        DWORD dwCodePoint = 0;
        BYTE Utf8Buffer[MAX_INVALID_CHARS];

        // Check for encoded sequence of bytes
        if(szString[0] == '%')
        {
            // If there is a single hexa number ("%c7"), decode that number
            if((szString + 3) <= szStringEnd)
            {
                TCHAR HexaString[3] = {0};

                HexaString[0] = szString[1];
                HexaString[1] = szString[2];
                if(SMemStrToBin(HexaString, Utf8Buffer, 1) == ERROR_SUCCESS)
                {
                    nOutLength = UTF8_FlushBinBuffer(pbBuffer, ccBuffer, nOutLength, Utf8Buffer, 1);
                    szString += 3;
                    continue;
                }
            }

            // If there is an escaped sequence ("%u[aabbcc]"), decode that sequence
            if((nUtf8Length = UTF16_IsEncodedCharSequence(szString, szStringEnd, Utf8Buffer)) != 0)
            {
                nOutLength = UTF8_FlushBinBuffer(pbBuffer, ccBuffer, nOutLength, Utf8Buffer, nUtf8Length);
                szString += (nUtf8Length * 2) + 4;
                continue;
            }
        }

#ifdef STORMLIB_WIDE_CHAR
        // Try to decode the code point from UTF-16
        if((dwErrCode = UTF16_DecodeCodePoint(szString, szStringEnd, dwCodePoint, ccCharsEaten)) != ERROR_SUCCESS)
            return dwErrCode;
#else
        // Try to decode the code point from UTF-16
        if((dwErrCode = UTF8_DecodeCodePoint((const BYTE *)szString, (const BYTE *)szStringEnd, dwCodePoint, ccCharsEaten)) != ERROR_SUCCESS)
            return dwErrCode;
#endif

        // Check whether the unicode char is not out of range
        assert(dwCodePoint <= SFILE_UNICODE_MAX);

        // Move the source pointer by the number of bytes eaten
        szString = szString + ccCharsEaten;

        // Encode the UNICODE char
        nUtf8Length = UTF8_EncodeCodePoint(dwCodePoint, Utf8Buffer);

        // Do we have enough space in the buffer?
        if(pbBuffer != NULL && (nOutLength + nUtf8Length) < ccBuffer)
        {
            // Write the encoded UTF-16 to the output
            memcpy(pbBuffer + nOutLength, Utf8Buffer, nUtf8Length);
        }

        // Increment the output length
        nOutLength = nOutLength + nUtf8Length;
    }

    // Terminate the string with zero, if we still have space
    if(pbBuffer != NULL && nOutLength < ccBuffer)
        pbBuffer[nOutLength] = 0;
    nOutLength++;

    // Give the output length, if required
    if(pOutLength != NULL)
        pOutLength[0] = nOutLength;
    return dwErrCode;
}