// <text_encoding> -*- C++ -*-

// Copyright The GNU Toolchain Authors.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.

// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
// <http://www.gnu.org/licenses/>.

/** @file include/text_encoding
 *  This is a Standard C++ Library header.
 */

#ifndef _GLIBCXX_TEXT_ENCODING
#define _GLIBCXX_TEXT_ENCODING

#ifdef _GLIBCXX_SYSHDR
#pragma GCC system_header
#endif

#include <bits/requires_hosted.h>

#define __glibcxx_want_text_encoding
#include <bits/version.h>

#ifdef __cpp_lib_text_encoding
#include <compare>
#include <string_view>
#include <bits/functional_hash.h> // hash
#include <bits/ranges_util.h>     // view_interface
#include <bits/unicode.h>         // __charset_alias_match
#include <ext/numeric_traits.h>   // __int_traits

namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION

  /**
   * @brief An interface for accessing the IANA Character Sets registry.
   * @ingroup locales
   * @since C++23
   */
  struct text_encoding
  {
  private:
    struct _Rep
    {
      using id = __INT_LEAST32_TYPE__;
      id _M_id;
      const char* _M_name;

      friend constexpr bool
      operator<(const _Rep& __r, id __m) noexcept
      { return __r._M_id < __m; }

      friend constexpr bool
      operator==(const _Rep& __r, string_view __name) noexcept
      { return __r._M_name == __name; }
    };

  public:
    static constexpr size_t max_name_length = 63;

    enum class id : _Rep::id
    {
      other = 1,
      unknown = 2,
      ASCII = 3,
      ISOLatin1 = 4,
      ISOLatin2 = 5,
      ISOLatin3 = 6,
      ISOLatin4 = 7,
      ISOLatinCyrillic = 8,
      ISOLatinArabic = 9,
      ISOLatinGreek = 10,
      ISOLatinHebrew = 11,
      ISOLatin5 = 12,
      ISOLatin6 = 13,
      ISOTextComm = 14,
      HalfWidthKatakana = 15,
      JISEncoding = 16,
      ShiftJIS = 17,
      EUCPkdFmtJapanese = 18,
      EUCFixWidJapanese = 19,
      ISO4UnitedKingdom = 20,
      ISO11SwedishForNames = 21,
      ISO15Italian = 22,
      ISO17Spanish = 23,
      ISO21German = 24,
      ISO60DanishNorwegian = 25,
      ISO69French = 26,
      ISO10646UTF1 = 27,
      ISO646basic1983 = 28,
      INVARIANT = 29,
      ISO2IntlRefVersion = 30,
      NATSSEFI = 31,
      NATSSEFIADD = 32,
      ISO10Swedish = 35,
      KSC56011987 = 36,
      ISO2022KR = 37,
      EUCKR = 38,
      ISO2022JP = 39,
      ISO2022JP2 = 40,
      ISO13JISC6220jp = 41,
      ISO14JISC6220ro = 42,
      ISO16Portuguese = 43,
      ISO18Greek7Old = 44,
      ISO19LatinGreek = 45,
      ISO25French = 46,
      ISO27LatinGreek1 = 47,
      ISO5427Cyrillic = 48,
      ISO42JISC62261978 = 49,
      ISO47BSViewdata = 50,
      ISO49INIS = 51,
      ISO50INIS8 = 52,
      ISO51INISCyrillic = 53,
      ISO54271981 = 54,
      ISO5428Greek = 55,
      ISO57GB1988 = 56,
      ISO58GB231280 = 57,
      ISO61Norwegian2 = 58,
      ISO70VideotexSupp1 = 59,
      ISO84Portuguese2 = 60,
      ISO85Spanish2 = 61,
      ISO86Hungarian = 62,
      ISO87JISX0208 = 63,
      ISO88Greek7 = 64,
      ISO89ASMO449 = 65,
      ISO90 = 66,
      ISO91JISC62291984a = 67,
      ISO92JISC62991984b = 68,
      ISO93JIS62291984badd = 69,
      ISO94JIS62291984hand = 70,
      ISO95JIS62291984handadd = 71,
      ISO96JISC62291984kana = 72,
      ISO2033 = 73,
      ISO99NAPLPS = 74,
      ISO102T617bit = 75,
      ISO103T618bit = 76,
      ISO111ECMACyrillic = 77,
      ISO121Canadian1 = 78,
      ISO122Canadian2 = 79,
      ISO123CSAZ24341985gr = 80,
      ISO88596E = 81,
      ISO88596I = 82,
      ISO128T101G2 = 83,
      ISO88598E = 84,
      ISO88598I = 85,
      ISO139CSN369103 = 86,
      ISO141JUSIB1002 = 87,
      ISO143IECP271 = 88,
      ISO146Serbian = 89,
      ISO147Macedonian = 90,
      ISO150 = 91,
      ISO151Cuba = 92,
      ISO6937Add = 93,
      ISO153GOST1976874 = 94,
      ISO8859Supp = 95,
      ISO10367Box = 96,
      ISO158Lap = 97,
      ISO159JISX02121990 = 98,
      ISO646Danish = 99,
      USDK = 100,
      DKUS = 101,
      KSC5636 = 102,
      Unicode11UTF7 = 103,
      ISO2022CN = 104,
      ISO2022CNEXT = 105,
      UTF8 = 106,
      ISO885913 = 109,
      ISO885914 = 110,
      ISO885915 = 111,
      ISO885916 = 112,
      GBK = 113,
      GB18030 = 114,
      OSDEBCDICDF0415 = 115,
      OSDEBCDICDF03IRV = 116,
      OSDEBCDICDF041 = 117,
      ISO115481 = 118,
      KZ1048 = 119,
      UCS2 = 1000,
      UCS4 = 1001,
      UnicodeASCII = 1002,
      UnicodeLatin1 = 1003,
      UnicodeJapanese = 1004,
      UnicodeIBM1261 = 1005,
      UnicodeIBM1268 = 1006,
      UnicodeIBM1276 = 1007,
      UnicodeIBM1264 = 1008,
      UnicodeIBM1265 = 1009,
      Unicode11 = 1010,
      SCSU = 1011,
      UTF7 = 1012,
      UTF16BE = 1013,
      UTF16LE = 1014,
      UTF16 = 1015,
      CESU8 = 1016,
      UTF32 = 1017,
      UTF32BE = 1018,
      UTF32LE = 1019,
      BOCU1 = 1020,
      UTF7IMAP = 1021,
      Windows30Latin1 = 2000,
      Windows31Latin1 = 2001,
      Windows31Latin2 = 2002,
      Windows31Latin5 = 2003,
      HPRoman8 = 2004,
      AdobeStandardEncoding = 2005,
      VenturaUS = 2006,
      VenturaInternational = 2007,
      DECMCS = 2008,
      PC850Multilingual = 2009,
      PC8DanishNorwegian = 2012,
      PC862LatinHebrew = 2013,
      PC8Turkish = 2014,
      IBMSymbols = 2015,
      IBMThai = 2016,
      HPLegal = 2017,
      HPPiFont = 2018,
      HPMath8 = 2019,
      HPPSMath = 2020,
      HPDesktop = 2021,
      VenturaMath = 2022,
      MicrosoftPublishing = 2023,
      Windows31J = 2024,
      GB2312 = 2025,
      Big5 = 2026,
      Macintosh = 2027,
      IBM037 = 2028,
      IBM038 = 2029,
      IBM273 = 2030,
      IBM274 = 2031,
      IBM275 = 2032,
      IBM277 = 2033,
      IBM278 = 2034,
      IBM280 = 2035,
      IBM281 = 2036,
      IBM284 = 2037,
      IBM285 = 2038,
      IBM290 = 2039,
      IBM297 = 2040,
      IBM420 = 2041,
      IBM423 = 2042,
      IBM424 = 2043,
      PC8CodePage437 = 2011,
      IBM500 = 2044,
      IBM851 = 2045,
      PCp852 = 2010,
      IBM855 = 2046,
      IBM857 = 2047,
      IBM860 = 2048,
      IBM861 = 2049,
      IBM863 = 2050,
      IBM864 = 2051,
      IBM865 = 2052,
      IBM868 = 2053,
      IBM869 = 2054,
      IBM870 = 2055,
      IBM871 = 2056,
      IBM880 = 2057,
      IBM891 = 2058,
      IBM903 = 2059,
      IBM904 = 2060,
      IBM905 = 2061,
      IBM918 = 2062,
      IBM1026 = 2063,
      IBMEBCDICATDE = 2064,
      EBCDICATDEA = 2065,
      EBCDICCAFR = 2066,
      EBCDICDKNO = 2067,
      EBCDICDKNOA = 2068,
      EBCDICFISE = 2069,
      EBCDICFISEA = 2070,
      EBCDICFR = 2071,
      EBCDICIT = 2072,
      EBCDICPT = 2073,
      EBCDICES = 2074,
      EBCDICESA = 2075,
      EBCDICESS = 2076,
      EBCDICUK = 2077,
      EBCDICUS = 2078,
      Unknown8BiT = 2079,
      Mnemonic = 2080,
      Mnem = 2081,
      VISCII = 2082,
      VIQR = 2083,
      KOI8R = 2084,
      HZGB2312 = 2085,
      IBM866 = 2086,
      PC775Baltic = 2087,
      KOI8U = 2088,
      IBM00858 = 2089,
      IBM00924 = 2090,
      IBM01140 = 2091,
      IBM01141 = 2092,
      IBM01142 = 2093,
      IBM01143 = 2094,
      IBM01144 = 2095,
      IBM01145 = 2096,
      IBM01146 = 2097,
      IBM01147 = 2098,
      IBM01148 = 2099,
      IBM01149 = 2100,
      Big5HKSCS = 2101,
      IBM1047 = 2102,
      PTCP154 = 2103,
      Amiga1251 = 2104,
      KOI7switched = 2105,
      BRF = 2106,
      TSCII = 2107,
      CP51932 = 2108,
      windows874 = 2109,
      windows1250 = 2250,
      windows1251 = 2251,
      windows1252 = 2252,
      windows1253 = 2253,
      windows1254 = 2254,
      windows1255 = 2255,
      windows1256 = 2256,
      windows1257 = 2257,
      windows1258 = 2258,
      TIS620 = 2259,
      CP50220 = 2260
    };
    using enum id;

    constexpr text_encoding() = default;

    constexpr explicit
    text_encoding(string_view __enc) noexcept
    : _M_rep(_S_find_name(__enc))
    {
      __enc.copy(_M_name, max_name_length);
    }

    // @pre i has the value of one of the enumerators of id.
    constexpr
    text_encoding(id __i) noexcept
    : _M_rep(_S_find_id(__i))
    {
      if (string_view __name(_M_rep->_M_name); !__name.empty())
	__name.copy(_M_name, max_name_length);
    }

    constexpr id mib() const noexcept { return id(_M_rep->_M_id); }

    constexpr const char* name() const noexcept { return _M_name; }

    struct aliases_view : ranges::view_interface<aliases_view>
    {
    private:
      class _Iterator;
      struct _Sentinel { };

    public:
      constexpr _Iterator begin() const noexcept;
      constexpr _Sentinel end() const noexcept { return {}; }

    private:
      friend struct text_encoding;

      constexpr explicit aliases_view(const _Rep* __r) : _M_begin(__r) { }

      const _Rep* _M_begin = nullptr;
    };

    constexpr aliases_view
    aliases() const noexcept
    {
      return _M_rep->_M_name[0] ? aliases_view(_M_rep) : aliases_view{nullptr};
    }

    friend constexpr bool
    operator==(const text_encoding& __a,
	       const text_encoding& __b) noexcept
    {
      if (__a.mib() == id::other && __b.mib() == id::other) [[unlikely]]
	return _S_comp(__a._M_name, __b._M_name);
      else
	return __a.mib() == __b.mib();
    }

    friend constexpr bool
    operator==(const text_encoding& __encoding, id __i) noexcept
    { return __encoding.mib() == __i; }

#if __CHAR_BIT__ == 8
    static consteval text_encoding
    literal() noexcept
    {
#ifdef __GNUC_EXECUTION_CHARSET_NAME
      return text_encoding(__GNUC_EXECUTION_CHARSET_NAME);
#elif defined __clang_literal_encoding__
      return text_encoding(__clang_literal_encoding__);
#else
      return text_encoding();
#endif
    }

    static text_encoding
    environment();

    template<id _Id>
      static bool
      environment_is()
      { return text_encoding(_Id)._M_is_environment(); }
#else
    static text_encoding literal() = delete;
    static text_encoding environment() = delete;
    template<id> static bool environment_is() = delete;
#endif

  private:
    const _Rep* _M_rep = _S_reps + 1; // id::unknown
    char _M_name[max_name_length + 1] = {0};

    bool
    _M_is_environment() const;

    static inline constexpr _Rep _S_reps[] = {
      { 1, "" }, { 2, "" },
#define _GLIBCXX_GET_ENCODING_DATA
#include <bits/text_encoding-data.h>
#ifdef _GLIBCXX_GET_ENCODING_DATA
# error "Invalid text_encoding data"
#endif
      { 9999, nullptr }, // sentinel
    };

    static constexpr bool
    _S_comp(string_view __a, string_view __b)
    { return __unicode::__charset_alias_match(__a, __b); }

    static constexpr const _Rep*
    _S_find_name(string_view __name) noexcept
    {
#ifdef _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET
      // Optimize the common UTF-8 case to avoid a linear search through all
      // strings in the table using the _S_comp function.
      if (__name == "UTF-8")
	return _S_reps + 2 + _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET;
#endif

      // The first two array elements (other and unknown) don't have names.
      // The last element is a sentinel that can never match anything.
      const auto __first = _S_reps + 2, __end = std::end(_S_reps) - 1;
      for (auto __r = __first; __r != __end; ++__r)
	if (_S_comp(__r->_M_name, __name))
	  {
	    // Might have matched an alias. Find the first entry for this ID.
	    const auto __id = __r->_M_id;
	    while (__r[-1]._M_id == __id)
	      --__r;
	    return __r;
	  }
      return _S_reps; // id::other
    }

    static constexpr const _Rep*
    _S_find_id(id __id) noexcept
    {
      const auto __i = (_Rep::id)__id;
      const auto __r = std::lower_bound(_S_reps, std::end(_S_reps) - 1, __i);
      if (__r->_M_id == __i) [[likely]]
	return __r;
      else
	{
	  // Preconditions: i has the value of one of the enumerators of id.
	  __glibcxx_assert(__r->_M_id == __i);
	  return _S_reps + 1; // id::unknown
	}
    }
  };

  template<>
    struct hash<text_encoding>
    {
      size_t
      operator()(const text_encoding& __enc) const noexcept
      { return std::hash<text_encoding::id>()(__enc.mib()); }
    };

  class text_encoding::aliases_view::_Iterator
  {
  public:
    using value_type = const char*;
    using reference = const char*;
    using difference_type = int;

    constexpr _Iterator() = default;

    constexpr value_type
    operator*() const
    {
      if (_M_dereferenceable()) [[likely]]
	return _M_rep->_M_name;
      else
	{
	  __glibcxx_assert(_M_dereferenceable());
	  return "";
	}
    }

    constexpr _Iterator&
    operator++()
    {
      if (_M_dereferenceable()) [[likely]]
	++_M_rep;
      else
	{
	  __glibcxx_assert(_M_dereferenceable());
	  *this = _Iterator{};
	}
      return *this;
    }

    constexpr _Iterator&
    operator--()
    {
      const bool __decrementable
	= _M_rep != nullptr && _M_rep[-1]._M_id == _M_id;
      if (__decrementable) [[likely]]
	--_M_rep;
      else
	{
	  __glibcxx_assert(__decrementable);
	  *this = _Iterator{};
	}
      return *this;
    }

    constexpr _Iterator
    operator++(int)
    {
      auto __it = *this;
      ++*this;
      return __it;
    }

    constexpr _Iterator
    operator--(int)
    {
      auto __it = *this;
      --*this;
      return __it;
    }

    constexpr value_type
    operator[](difference_type __n) const
    { return *(*this + __n); }

    constexpr _Iterator&
    operator+=(difference_type __n)
    {
      if (_M_rep != nullptr)
	{
	  if (__n > 0)
	    {
	      if (__n < (std::end(_S_reps) - _M_rep)
			  && _M_rep[__n - 1]._M_id == _M_id) [[likely]]
		 _M_rep += __n;
	      else
		*this == _Iterator{};
	    }
	  else if (__n < 0)
	    {
	      if (__n > (_S_reps - _M_rep)
		    && _M_rep[__n]._M_id == _M_id) [[likely]]
		 _M_rep += __n;
	      else
		*this == _Iterator{};
	    }
	}
      if (__n != 0)
	__glibcxx_assert(_M_rep != nullptr);
      return *this;
    }

    constexpr _Iterator&
    operator-=(difference_type __n)
    {
      using _Traits = __gnu_cxx::__int_traits<difference_type>;
      if (__n == _Traits::__min) [[unlikely]]
	return operator+=(_Traits::__max);
      return operator+=(-__n);
    }

    constexpr difference_type
    operator-(const _Iterator& __i) const
    {
      if (_M_id == __i._M_id)
	return _M_rep - __i._M_rep;
      __glibcxx_assert(_M_id == __i._M_id);
      return __gnu_cxx::__int_traits<difference_type>::__max;
    }

    constexpr bool
    operator==(const _Iterator&) const = default;

    constexpr bool
    operator==(_Sentinel) const noexcept
    { return !_M_dereferenceable(); }

    constexpr strong_ordering
    operator<=>(const _Iterator& __i) const
    {
      __glibcxx_assert(_M_id == __i._M_id);
      return _M_rep <=> __i._M_rep;
    }

    friend constexpr _Iterator
    operator+(_Iterator __i, difference_type __n)
    {
      __i += __n;
      return __i;
    }

    friend constexpr _Iterator
    operator+(difference_type __n, _Iterator __i)
    {
      __i += __n;
      return __i;
    }

    friend constexpr _Iterator
    operator-(_Iterator __i, difference_type __n)
    {
      __i -= __n;
      return __i;
    }

  private:
    friend class text_encoding;

    constexpr explicit
    _Iterator(const _Rep* __r) noexcept
    : _M_rep(__r), _M_id(__r ? __r->_M_id : 0)
    { }

    constexpr bool
    _M_dereferenceable() const noexcept
    { return _M_rep != nullptr && _M_rep->_M_id == _M_id; }

    const _Rep* _M_rep = nullptr;
    _Rep::id _M_id = 0;
  };

  constexpr auto
  text_encoding::aliases_view::begin() const noexcept
  -> _Iterator
  { return _Iterator(_M_begin); }

namespace ranges
{
  // Opt-in to borrowed_range concept
  template<>
    inline constexpr bool
    enable_borrowed_range<std::text_encoding::aliases_view> = true;
}

_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std

#endif // __cpp_lib_text_encoding
#endif // _GLIBCXX_TEXT_ENCODING
