diff --git a/source/utf8/checked.h b/source/utf8/checked.h index 2aef583..d4b5469 100644 --- a/source/utf8/checked.h +++ b/source/utf8/checked.h @@ -193,6 +193,13 @@ namespace utf8 utf8::next(it, end); } + template + void retreat (octet_iterator& it, distance_type n, octet_iterator end) + { + for (distance_type i = 0; i < n; ++i) + utf8::prior(it, end); + } + template typename std::iterator_traits::difference_type distance (octet_iterator first, octet_iterator last) diff --git a/source/utf8/core.h b/source/utf8/core.h index ae0f367..927942d 100644 --- a/source/utf8/core.h +++ b/source/utf8/core.h @@ -116,15 +116,15 @@ namespace internal inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length) { if (cp < 0x80) { - if (length != 1) + if (length != 1) return true; } else if (cp < 0x800) { - if (length != 2) + if (length != 2) return true; } else if (cp < 0x10000) { - if (length != 3) + if (length != 3) return true; } @@ -142,11 +142,11 @@ namespace internal if (!utf8::internal::is_trail(*it)) return INCOMPLETE_SEQUENCE; - + return UTF8_OK; } - #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;} + #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;} /// get_sequence_x functions decode utf-8 sequences of the length x template @@ -163,9 +163,9 @@ namespace internal template utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point) { - if (it == end) + if (it == end) return NOT_ENOUGH_ROOM; - + code_point = utf8::internal::mask8(*it); UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) @@ -180,7 +180,7 @@ namespace internal { if (it == end) return NOT_ENOUGH_ROOM; - + code_point = utf8::internal::mask8(*it); UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) @@ -222,7 +222,7 @@ namespace internal template utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) { - if (it == end) + if (it == end) return NOT_ENOUGH_ROOM; // Save the original value of it so we can go back in case of failure @@ -237,7 +237,7 @@ namespace internal // Get trail octets and calculate the code point utf_error err = UTF8_OK; switch (length) { - case 0: + case 0: return INVALID_LEAD; case 1: err = utf8::internal::get_sequence_1(it, end, cp); @@ -265,7 +265,7 @@ namespace internal else err = OVERLONG_SEQUENCE; } - else + else err = INVALID_CODE_POINT; } @@ -314,8 +314,8 @@ namespace internal ((it != end) && (utf8::internal::mask8(*it)) == bom[2]) ); } - - //Deprecated in release 2.3 + + //Deprecated in release 2.3 template inline bool is_bom (octet_iterator it) { diff --git a/source/utf8/unchecked.h b/source/utf8/unchecked.h index cb24271..01bdd07 100644 --- a/source/utf8/unchecked.h +++ b/source/utf8/unchecked.h @@ -32,13 +32,13 @@ DEALINGS IN THE SOFTWARE. namespace utf8 { - namespace unchecked + namespace unchecked { template octet_iterator append(uint32_t cp, octet_iterator result) { if (cp < 0x80) // one octet - *(result++) = static_cast(cp); + *(result++) = static_cast(cp); else if (cp < 0x800) { // two octets *(result++) = static_cast((cp >> 6) | 0xc0); *(result++) = static_cast((cp & 0x3f) | 0x80); @@ -70,28 +70,28 @@ namespace utf8 cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); break; case 3: - ++it; + ++it; cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); ++it; cp += (*it) & 0x3f; break; case 4: ++it; - cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); + cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); ++it; cp += (utf8::internal::mask8(*it) << 6) & 0xfff; ++it; - cp += (*it) & 0x3f; + cp += (*it) & 0x3f; break; } ++it; - return cp; + return cp; } template uint32_t peek_next(octet_iterator it) { - return utf8::unchecked::next(it); + return utf8::unchecked::next(it); } template @@ -116,19 +116,26 @@ namespace utf8 utf8::unchecked::next(it); } + template + void retreat (octet_iterator& it, distance_type n) + { + for (distance_type i = 0; i < n; ++i) + utf8::unchecked::prior(it); + } + template typename std::iterator_traits::difference_type distance (octet_iterator first, octet_iterator last) { typename std::iterator_traits::difference_type dist; - for (dist = 0; first < last; ++dist) + for (dist = 0; first < last; ++dist) utf8::unchecked::next(first); return dist; } template octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) - { + { while (start != end) { uint32_t cp = utf8::internal::mask16(*start++); // Take care of surrogate pairs first @@ -138,7 +145,7 @@ namespace utf8 } result = utf8::unchecked::append(cp, result); } - return result; + return result; } template @@ -176,7 +183,7 @@ namespace utf8 // The iterator class template - class iterator : public std::iterator { + class iterator : public std::iterator { octet_iterator it; public: iterator () {} @@ -188,15 +195,15 @@ namespace utf8 octet_iterator temp = it; return utf8::unchecked::next(temp); } - bool operator == (const iterator& rhs) const - { + bool operator == (const iterator& rhs) const + { return (it == rhs.it); } bool operator != (const iterator& rhs) const { return !(operator == (rhs)); } - iterator& operator ++ () + iterator& operator ++ () { ::std::advance(it, utf8::internal::sequence_length(it)); return *this; @@ -206,7 +213,7 @@ namespace utf8 iterator temp = *this; ::std::advance(it, utf8::internal::sequence_length(it)); return temp; - } + } iterator& operator -- () { utf8::unchecked::prior(it); @@ -221,7 +228,7 @@ namespace utf8 }; // class iterator } // namespace utf8::unchecked -} // namespace utf8 +} // namespace utf8 #endif // header guard