From 85fc9d93b5e89d09b61a271e05e7de8a3dca2359 Mon Sep 17 00:00:00 2001 From: Shauren Date: Tue, 31 Jan 2023 18:17:34 +0100 Subject: Dep: Upgrade utfcpp to 3.2.3 (cherry picked from commit 74dc88eb54751d5cd3eb8c379cdf9b2bdbbaa1ec) --- dep/utf8cpp/utf8/unchecked.h | 93 +++++++++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 32 deletions(-) (limited to 'dep/utf8cpp/utf8/unchecked.h') diff --git a/dep/utf8cpp/utf8/unchecked.h b/dep/utf8cpp/utf8/unchecked.h index cb2427166b1..8fe83c9ecbc 100644 --- a/dep/utf8cpp/utf8/unchecked.h +++ b/dep/utf8cpp/utf8/unchecked.h @@ -32,29 +32,52 @@ DEALINGS IN THE SOFTWARE. namespace utf8 { - namespace unchecked + namespace unchecked { template octet_iterator append(uint32_t cp, octet_iterator result) { - if (cp < 0x80) // one octet - *(result++) = static_cast(cp); - else if (cp < 0x800) { // two octets - *(result++) = static_cast((cp >> 6) | 0xc0); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - else if (cp < 0x10000) { // three octets - *(result++) = static_cast((cp >> 12) | 0xe0); - *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - else { // four octets - *(result++) = static_cast((cp >> 18) | 0xf0); - *(result++) = static_cast(((cp >> 12) & 0x3f)| 0x80); - *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); - *(result++) = static_cast((cp & 0x3f) | 0x80); + return internal::append(cp, result); + } + + template + output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) + { + while (start != end) { + octet_iterator sequence_start = start; + internal::utf_error err_code = utf8::internal::validate_next(start, end); + switch (err_code) { + case internal::UTF8_OK : + for (octet_iterator it = sequence_start; it != start; ++it) + *out++ = *it; + break; + case internal::NOT_ENOUGH_ROOM: + out = utf8::unchecked::append (replacement, out); + start = end; + break; + case internal::INVALID_LEAD: + out = utf8::unchecked::append (replacement, out); + ++start; + break; + case internal::INCOMPLETE_SEQUENCE: + case internal::OVERLONG_SEQUENCE: + case internal::INVALID_CODE_POINT: + out = utf8::unchecked::append (replacement, out); + ++start; + // just one replacement mark for the sequence + while (start != end && utf8::internal::is_trail(*start)) + ++start; + break; + } } - return result; + return out; + } + + template + inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) + { + static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd); + return utf8::unchecked::replace_invalid(start, end, out, replacement_marker); } template @@ -85,13 +108,13 @@ namespace utf8 break; } ++it; - return cp; + return cp; } template uint32_t peek_next(octet_iterator it) { - return utf8::unchecked::next(it); + return utf8::unchecked::next(it); } template @@ -102,18 +125,19 @@ namespace utf8 return utf8::unchecked::next(temp); } - // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous) - template - inline uint32_t previous(octet_iterator& it) - { - return utf8::unchecked::prior(it); - } - template void advance (octet_iterator& it, distance_type n) { - for (distance_type i = 0; i < n; ++i) - utf8::unchecked::next(it); + const distance_type zero(0); + if (n < zero) { + // backward + for (distance_type i = n; i < zero; ++i) + utf8::unchecked::prior(it); + } else { + // forward + for (distance_type i = zero; i < n; ++i) + utf8::unchecked::next(it); + } } template @@ -128,7 +152,7 @@ namespace utf8 template octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) - { + { while (start != end) { uint32_t cp = utf8::internal::mask16(*start++); // Take care of surrogate pairs first @@ -138,7 +162,7 @@ namespace utf8 } result = utf8::unchecked::append(cp, result); } - return result; + return result; } template @@ -176,9 +200,14 @@ namespace utf8 // The iterator class template - class iterator : public std::iterator { + class iterator { octet_iterator it; public: + typedef uint32_t value_type; + typedef uint32_t* pointer; + typedef uint32_t& reference; + typedef std::ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; iterator () {} explicit iterator (const octet_iterator& octet_it): it(octet_it) {} // the default "big three" are OK -- cgit v1.2.3