blockchain/src/common/utf8/unchecked.h

// Copyright (c) 2014-2018 Zano Project
// Copyright (c) 2014-2018 The Louisdor Project
// Copyright (c) 2012-2013 The Boolberry developers
// Copyright (c) 2017-2025 Lethean (https://lt.hn)
//
// Licensed under the European Union Public Licence (EUPL) version 1.2.
// You may obtain a copy of the licence at:
//
//     https://joinup.ec.europa.eu/software/page/eupl/licence-eupl
//
// The EUPL is a copyleft licence that is compatible with the MIT/X11
// licence used by the original projects; the MIT terms are therefore
// considered “grandfathered” under the EUPL for this code.
//
// SPDX‑License‑Identifier: EUPL-1.2
//

#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731

#include "core.h"

namespace utf8
{
    namespace unchecked
    {
        template <typename octet_iterator>
        octet_iterator append(utfchar32_t cp, octet_iterator result)
        {
            return internal::append(cp, result);
        }

        template <typename word_iterator>
        word_iterator append16(utfchar32_t cp, word_iterator result)
        {
            return internal::append16(cp, result);
        }

        template <typename octet_iterator, typename output_iterator>
        output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement)
        {
            while (start != end) {
                octet_iterator sequence_start = start;
                internal::utf_error err_code = utf8::internal::validate_next(start, end);
                switch (err_code) {
                    case internal::UTF8_OK :
                        for (octet_iterator it = sequence_start; it != start; ++it)
                            *out++ = *it;
                        break;
                    case internal::NOT_ENOUGH_ROOM:
                        out = utf8::unchecked::append(replacement, out);
                        start = end;
                        break;
                    case internal::INVALID_LEAD:
                        out = utf8::unchecked::append(replacement, out);
                        ++start;
                        break;
                    case internal::INCOMPLETE_SEQUENCE:
                    case internal::OVERLONG_SEQUENCE:
                    case internal::INVALID_CODE_POINT:
                        out = utf8::unchecked::append(replacement, out);
                        ++start;
                        // just one replacement mark for the sequence
                        while (start != end && utf8::internal::is_trail(*start))
                            ++start;
                        break;
                }
            }
            return out;
        }

        template <typename octet_iterator, typename output_iterator>
        inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
        {
            static const utfchar32_t replacement_marker = utf8::internal::mask16(0xfffd);
            return utf8::unchecked::replace_invalid(start, end, out, replacement_marker);
        }

        inline std::string replace_invalid(const std::string& s, utfchar32_t replacement)
        {
            std::string result;
            replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
            return result;
        }

        inline std::string replace_invalid(const std::string& s)
        {
            std::string result;
            replace_invalid(s.begin(), s.end(), std::back_inserter(result));
            return result;
        }

        template <typename octet_iterator>
        utfchar32_t next(octet_iterator& it)
        {
            utfchar32_t cp = utf8::internal::mask8(*it);
            switch (utf8::internal::sequence_length(it)) {
                case 1:
                    break;
                case 2:
                    it++;
                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
                    break;
                case 3:
                    ++it; 
                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
                    ++it;
                    cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
                    break;
                case 4:
                    ++it;
                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);                
                    ++it;
                    cp = static_cast<utfchar32_t>(cp + ((utf8::internal::mask8(*it) << 6) & 0xfff));
                    ++it;
                    cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f)); 
                    break;
            }
            ++it;
            return cp;
        }

        template <typename octet_iterator>
        utfchar32_t peek_next(octet_iterator it)
        {
            return utf8::unchecked::next(it);
        }

        template <typename word_iterator>
        utfchar32_t next16(word_iterator& it)
        {
            utfchar32_t cp = utf8::internal::mask16(*it++);
            if (utf8::internal::is_lead_surrogate(cp))
                return (cp << 10) + *it++ + utf8::internal::SURROGATE_OFFSET;
            return cp;
        }

        template <typename octet_iterator>
        utfchar32_t prior(octet_iterator& it)
        {
            while (utf8::internal::is_trail(*(--it))) ;
            octet_iterator temp = it;
            return utf8::unchecked::next(temp);
        }

        template <typename octet_iterator, typename distance_type>
        void advance(octet_iterator& it, distance_type n)
        {
            const distance_type zero(0);
            if (n < zero) {
                // backward
                for (distance_type i = n; i < zero; ++i)
                    utf8::unchecked::prior(it);
            } else {
                // forward
                for (distance_type i = zero; i < n; ++i)
                    utf8::unchecked::next(it);
            }
        }

        template <typename octet_iterator>
        typename std::iterator_traits<octet_iterator>::difference_type
        distance(octet_iterator first, octet_iterator last)
        {
            typename std::iterator_traits<octet_iterator>::difference_type dist;
            for (dist = 0; first < last; ++dist) 
                utf8::unchecked::next(first);
            return dist;
        }

        template <typename u16bit_iterator, typename octet_iterator>
        octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result)
        {
            while (start != end) {
                utfchar32_t cp = utf8::internal::mask16(*start++);
                // Take care of surrogate pairs first
                if (utf8::internal::is_lead_surrogate(cp)) {
                    if (start == end)
                        return result;
                    utfchar32_t trail_surrogate = utf8::internal::mask16(*start++);
                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
                }
                result = utf8::unchecked::append(cp, result);
            }
            return result;
        }

        template <typename u16bit_iterator, typename octet_iterator>
        u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result)
        {
            while (start < end) {
                utfchar32_t cp = utf8::unchecked::next(start);
                if (cp > 0xffff) { //make a surrogate pair
                    *result++ = static_cast<utfchar16_t>((cp >> 10)   + internal::LEAD_OFFSET);
                    *result++ = static_cast<utfchar16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
                }
                else
                    *result++ = static_cast<utfchar16_t>(cp);
            }
            return result;
        }

        template <typename octet_iterator, typename u32bit_iterator>
        octet_iterator utf32to8(u32bit_iterator start, u32bit_iterator end, octet_iterator result)
        {
            while (start != end)
                result = utf8::unchecked::append(*(start++), result);

            return result;
        }

        template <typename octet_iterator, typename u32bit_iterator>
        u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result)
        {
            while (start < end)
                (*result++) = utf8::unchecked::next(start);

            return result;
        }

        // The iterator class
        template <typename octet_iterator>
          class iterator {
            octet_iterator it;
            public:
            typedef utfchar32_t value_type;
            typedef utfchar32_t* pointer;
            typedef utfchar32_t& reference;
            typedef std::ptrdiff_t difference_type;
            typedef std::bidirectional_iterator_tag iterator_category;
            iterator () {}
            explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
            // the default "big three" are OK
            octet_iterator base () const { return it; }
            utfchar32_t operator * () const
            {
                octet_iterator temp = it;
                return utf8::unchecked::next(temp);
            }
            bool operator == (const iterator& rhs) const 
            { 
                return (it == rhs.it);
            }
            bool operator != (const iterator& rhs) const
            {
                return !(operator == (rhs));
            }
            iterator& operator ++ () 
            {
                ::std::advance(it, utf8::internal::sequence_length(it));
                return *this;
            }
            iterator operator ++ (int)
            {
                iterator temp = *this;
                ::std::advance(it, utf8::internal::sequence_length(it));
                return temp;
            }  
            iterator& operator -- ()
            {
                utf8::unchecked::prior(it);
                return *this;
            }
            iterator operator -- (int)
            {
                iterator temp = *this;
                utf8::unchecked::prior(it);
                return temp;
            }
          }; // class iterator

    } // namespace utf8::unchecked
} // namespace utf8 


#endif // header guard