Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add retreat counterpart of advance #20

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions source/utf8/checked.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,13 @@ namespace utf8
utf8::next(it, end);
}

template <typename octet_iterator, typename distance_type>
void retreat (octet_iterator& it, distance_type n, octet_iterator end)
{
for (distance_type i = 0; i < n; ++i)
utf8::prior(it, end);
}

template <typename octet_iterator>
typename std::iterator_traits<octet_iterator>::difference_type
distance (octet_iterator first, octet_iterator last)
Expand Down
26 changes: 13 additions & 13 deletions source/utf8/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,15 @@ namespace internal
inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
{
if (cp < 0x80) {
if (length != 1)
if (length != 1)
return true;
}
else if (cp < 0x800) {
if (length != 2)
if (length != 2)
return true;
}
else if (cp < 0x10000) {
if (length != 3)
if (length != 3)
return true;
}

Expand All @@ -142,11 +142,11 @@ namespace internal

if (!utf8::internal::is_trail(*it))
return INCOMPLETE_SEQUENCE;

return UTF8_OK;
}

#define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
#define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}

/// get_sequence_x functions decode utf-8 sequences of the length x
template <typename octet_iterator>
Expand All @@ -163,9 +163,9 @@ namespace internal
template <typename octet_iterator>
utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
if (it == end)
return NOT_ENOUGH_ROOM;

code_point = utf8::internal::mask8(*it);

UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
Expand All @@ -180,7 +180,7 @@ namespace internal
{
if (it == end)
return NOT_ENOUGH_ROOM;

code_point = utf8::internal::mask8(*it);

UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
Expand Down Expand Up @@ -222,7 +222,7 @@ namespace internal
template <typename octet_iterator>
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
if (it == end)
return NOT_ENOUGH_ROOM;

// Save the original value of it so we can go back in case of failure
Expand All @@ -237,7 +237,7 @@ namespace internal
// Get trail octets and calculate the code point
utf_error err = UTF8_OK;
switch (length) {
case 0:
case 0:
return INVALID_LEAD;
case 1:
err = utf8::internal::get_sequence_1(it, end, cp);
Expand Down Expand Up @@ -265,7 +265,7 @@ namespace internal
else
err = OVERLONG_SEQUENCE;
}
else
else
err = INVALID_CODE_POINT;
}

Expand Down Expand Up @@ -314,8 +314,8 @@ namespace internal
((it != end) && (utf8::internal::mask8(*it)) == bom[2])
);
}
//Deprecated in release 2.3

//Deprecated in release 2.3
template <typename octet_iterator>
inline bool is_bom (octet_iterator it)
{
Expand Down
39 changes: 23 additions & 16 deletions source/utf8/unchecked.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ DEALINGS IN THE SOFTWARE.

namespace utf8
{
namespace unchecked
namespace unchecked
{
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result)
{
if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
Expand Down Expand Up @@ -70,28 +70,28 @@ namespace utf8
cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
break;
case 3:
++it;
++it;
cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
++it;
cp += (*it) & 0x3f;
break;
case 4:
++it;
cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
++it;
cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
++it;
cp += (*it) & 0x3f;
cp += (*it) & 0x3f;
break;
}
++it;
return cp;
return cp;
}

template <typename octet_iterator>
uint32_t peek_next(octet_iterator it)
{
return utf8::unchecked::next(it);
return utf8::unchecked::next(it);
}

template <typename octet_iterator>
Expand All @@ -116,19 +116,26 @@ namespace utf8
utf8::unchecked::next(it);
}

template <typename octet_iterator, typename distance_type>
void retreat (octet_iterator& it, distance_type n)
{
for (distance_type i = 0; i < n; ++i)
utf8::unchecked::prior(it);
}

template <typename octet_iterator>
typename std::iterator_traits<octet_iterator>::difference_type
distance (octet_iterator first, octet_iterator last)
{
typename std::iterator_traits<octet_iterator>::difference_type dist;
for (dist = 0; first < last; ++dist)
for (dist = 0; first < last; ++dist)
utf8::unchecked::next(first);
return dist;
}

template <typename u16bit_iterator, typename octet_iterator>
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
{
{
while (start != end) {
uint32_t cp = utf8::internal::mask16(*start++);
// Take care of surrogate pairs first
Expand All @@ -138,7 +145,7 @@ namespace utf8
}
result = utf8::unchecked::append(cp, result);
}
return result;
return result;
}

template <typename u16bit_iterator, typename octet_iterator>
Expand Down Expand Up @@ -176,7 +183,7 @@ namespace utf8

// The iterator class
template <typename octet_iterator>
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
octet_iterator it;
public:
iterator () {}
Expand All @@ -188,15 +195,15 @@ namespace utf8
octet_iterator temp = it;
return utf8::unchecked::next(temp);
}
bool operator == (const iterator& rhs) const
{
bool operator == (const iterator& rhs) const
{
return (it == rhs.it);
}
bool operator != (const iterator& rhs) const
{
return !(operator == (rhs));
}
iterator& operator ++ ()
iterator& operator ++ ()
{
::std::advance(it, utf8::internal::sequence_length(it));
return *this;
Expand All @@ -206,7 +213,7 @@ namespace utf8
iterator temp = *this;
::std::advance(it, utf8::internal::sequence_length(it));
return temp;
}
}
iterator& operator -- ()
{
utf8::unchecked::prior(it);
Expand All @@ -221,7 +228,7 @@ namespace utf8
}; // class iterator

} // namespace utf8::unchecked
} // namespace utf8
} // namespace utf8


#endif // header guard
Expand Down