/* GNU moe - My Own Editor
   Copyright (C) 2005-2025 Antonio Diaz Diaz.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <cctype>
#include <string>

#include "iso_8859.h"


namespace ISO_8859 {

enum Code {
     NBSP    = 0xA0,	// no-break space
     IEXCLAM = 0xA1,	// inverted exclamation mark
     CENT    = 0xA2,	// cent sign
     POUND   = 0xA3,	// pound sign
     EURO    = 0xA4,	// euro sign
     YEN     = 0xA5,	// yen sign
     CSCARON = 0xA6,	// latin capital letter s with caron
     SECTION = 0xA7,	// section sign
     SSCARON = 0xA8,	// latin small letter s with caron
     COPY    = 0xA9,	// copyright sign
     FEMIORD = 0xAA,	// feminine ordinal indicator
     LDANGLE = 0xAB,	// left-pointing double angle quotation mark
     NOT     = 0xAC,	// not sign
     SHY     = 0xAD,	// soft hyphen
     REG     = 0xAE,	// registered sign
     MACRON  = 0xAF,	// macron
     DEG     = 0xB0,	// degree sign
     PLUSMIN = 0xB1,	// plus-minus sign
     POW2    = 0xB2,	// superscript two
     POW3    = 0xB3,	// superscript three
     CZCARON = 0xB4,	// latin capital letter z with caron
     MICRO   = 0xB5,	// micro sign
     PILCROW = 0xB6,	// pilcrow sign
     MIDDOT  = 0xB7,	// middle dot
     SZCARON = 0xB8,	// latin small letter z with caron
     POW1    = 0xB9,	// superscript one
     MASCORD = 0xBA,	// masculine ordinal indicator
     RDANGLE = 0xBB,	// right-pointing double angle quotation mark
     CLIGOE  = 0xBC,	// latin capital ligature oe
     SLIGOE  = 0xBD,	// latin small ligature oe
     CYDIAER = 0xBE,	// latin capital letter y with diaeresis
     IQUEST  = 0xBF,	// inverted question mark
     CAGRAVE = 0xC0,	// latin capital letter a with grave
     CAACUTE = 0xC1,	// latin capital letter a with acute
     CACIRCU = 0xC2,	// latin capital letter a with circumflex
     CATILDE = 0xC3,	// latin capital letter a with tilde
     CADIAER = 0xC4,	// latin capital letter a with diaeresis
     CARING  = 0xC5,	// latin capital letter a with ring above
     CLIGAE  = 0xC6,	// latin capital ligature ae
     CCCEDI  = 0xC7,	// latin capital letter c with cedilla
     CEGRAVE = 0xC8,	// latin capital letter e with grave
     CEACUTE = 0xC9,	// latin capital letter e with acute
     CECIRCU = 0xCA,	// latin capital letter e with circumflex
     CEDIAER = 0xCB,	// latin capital letter e with diaeresis
     CIGRAVE = 0xCC,	// latin capital letter i with grave
     CIACUTE = 0xCD,	// latin capital letter i with acute
     CICIRCU = 0xCE,	// latin capital letter i with circumflex
     CIDIAER = 0xCF,	// latin capital letter i with diaeresis
     CETH    = 0xD0,	// latin capital letter eth (icelandic)
     CNTILDE = 0xD1,	// latin capital letter n with tilde
     COGRAVE = 0xD2,	// latin capital letter o with grave
     COACUTE = 0xD3,	// latin capital letter o with acute
     COCIRCU = 0xD4,	// latin capital letter o with circumflex
     COTILDE = 0xD5,	// latin capital letter o with tilde
     CODIAER = 0xD6,	// latin capital letter o with diaeresis
     MULT    = 0xD7,	// multiplication sign
     COSTROK = 0xD8,	// latin capital letter o with stroke
     CUGRAVE = 0xD9,	// latin capital letter u with grave
     CUACUTE = 0xDA,	// latin capital letter u with acute
     CUCIRCU = 0xDB,	// latin capital letter u with circumflex
     CUDIAER = 0xDC,	// latin capital letter u with diaeresis
     CYACUTE = 0xDD,	// latin capital letter y with acute
     CTHORN  = 0xDE,	// latin capital letter thorn (icelandic)
     SSSHARP = 0xDF,	// latin small letter sharp s (german)
     SAGRAVE = 0xE0,	// latin small letter a with grave
     SAACUTE = 0xE1,	// latin small letter a with acute
     SACIRCU = 0xE2,	// latin small letter a with circumflex
     SATILDE = 0xE3,	// latin small letter a with tilde
     SADIAER = 0xE4,	// latin small letter a with diaeresis
     SARING  = 0xE5,	// latin small letter a with ring above
     SLIGAE  = 0xE6,	// latin small ligature ae
     SCCEDI  = 0xE7,	// latin small letter c with cedilla
     SEGRAVE = 0xE8,	// latin small letter e with grave
     SEACUTE = 0xE9,	// latin small letter e with acute
     SECIRCU = 0xEA,	// latin small letter e with circumflex
     SEDIAER = 0xEB,	// latin small letter e with diaeresis
     SIGRAVE = 0xEC,	// latin small letter i with grave
     SIACUTE = 0xED,	// latin small letter i with acute
     SICIRCU = 0xEE,	// latin small letter i with circumflex
     SIDIAER = 0xEF,	// latin small letter i with diaeresis
     SETH    = 0xF0,	// latin small letter eth (icelandic)
     SNTILDE = 0xF1,	// latin small letter n with tilde
     SOGRAVE = 0xF2,	// latin small letter o with grave
     SOACUTE = 0xF3,	// latin small letter o with acute
     SOCIRCU = 0xF4,	// latin small letter o with circumflex
     SOTILDE = 0xF5,	// latin small letter o with tilde
     SODIAER = 0xF6,	// latin small letter o with diaeresis
     DIV     = 0xF7,	// division sign
     SOSTROK = 0xF8,	// latin small letter o with stroke
     SUGRAVE = 0xF9,	// latin small letter u with grave
     SUACUTE = 0xFA,	// latin small letter u with acute
     SUCIRCU = 0xFB,	// latin small letter u with circumflex
     SUDIAER = 0xFC,	// latin small letter u with diaeresis
     SYACUTE = 0xFD,	// latin small letter y with acute
     STHORN  = 0xFE,	// latin small letter thorn (icelandic)
     SYDIAER = 0xFF	// latin small letter y with diaeresis
     };


int parse_hex( const std::string & s, const unsigned long i, const int len )
  {
  if( len < 1 || len > 8 || i + len > s.size() ) return -1;
  unsigned int result = 0;
  for( unsigned long j = i; j < i + len; ++j )
    {
    const int d = ISO_8859::xvalue( s[j] );
    if( d < 0 ) return -1; else { result <<= 4; result += d; }
    }
  if( result <= 0x7FFFFFFF ) return result; else return -1;
  }


unsigned char xdigit( const unsigned value )	// hex digit for 'value'
  {
  if( value <= 9 ) return '0' + value;
  if( value <= 15 ) return 'A' + value - 10;
  return 0;
  }

} // end namespace ISO_8859


// separate hex numbers of 5 or more digits in groups of 4 digits using '_'
const char * format_hex4( unsigned long long num )
  {
  enum { buffers = 8, bufsize = 3 * sizeof num };
  static char buffer[buffers][bufsize];	// circle of buffers for printf
  static int current = 0;

  char * const buf = buffer[current++]; current %= buffers;
  char * p = buf + bufsize - 1;		// fill the buffer backwards
  *p = 0;				// terminator

  for( int i = 0; ; )
    {
    *(--p) = ISO_8859::xdigit( num & 0x0F ); num >>= 4;
    if( num == 0 ) break;
    if( ++i >= 4 ) { i = 0; *(--p) = '_'; }
    }
  return p;
  }


/* 'seq[i]' begins a escape sequence (the characters following a '\').
   Return the corresponding code and, in *lenp, the number of bytes read.
   Return -1 if unknown escape sequence, -2 if syntax error.
*/
int ISO_8859::escape( const std::string & seq, const unsigned long i,
                      int * const lenp, const bool allow_u )
  {
  if( i >= seq.size() ) return -2;
  int len = 1;
  unsigned ch = (unsigned char)seq[i];

  switch( ch )
    {
    case '\\': break;
    case 'a': ch = '\a'; break;
    case 'b': ch = '\b'; break;
    case 'e': ch = 27; break;
    case 'f': ch = '\f'; break;
    case 'n': ch = '\n'; break;
    case 'r': ch = '\r'; break;
    case 't': ch = '\t'; break;
    case 'v': ch = '\v'; break;
    case '0': case '1': case '2': case '3':
    case '4': case '5': case '6': case '7':
      ch -= '0';
      while( len < 3 && i + len < seq.size() && ISO_8859::isodigit( seq[i+len] ) )
        { ch <<= 3; ch += seq[i+len] - '0'; ++len; }
      if( ch > 255 ) return -2;
      break;
    case 'd': ch = 0;
      while( len < 4 && i + len < seq.size() &&
             std::isdigit( (unsigned char)seq[i+len] ) )
        { ch *= 10; ch += seq[i+len] - '0'; ++len; }
      if( len != 4 || ch > 255 ) return -2;
      break;
    case 'u':
    case 'U': if( !allow_u ) return -2;		// syntax error
      { len = ( ch == 'u' ) ? 5 : 9;
        const int code = parse_hex( seq, i + 1, len - 1 );
        if( code < 0 ) return -2; else { ch = code; break; } }
    case 'x':
    case 'X': { const int code = parse_hex( seq, i + 1, 2 );
                if( code < 0 ) return -2; else { ch = code; len = 3; break; } }
    default: return -1;
    }
  if( lenp ) *lenp = len;
  return ch;
  }


void ISO_8859::escapize( const unsigned char ch, std::string & out )
  {
  if( !ISO_8859::iscntrl( ch ) && ch != '\\' ) { out += ch; return; }
  out += '\\';
  switch( ch )
    {
    case '\\': out += '\\'; break;
    case '\a': out += 'a'; break;
    case '\b': out += 'b'; break;
    case '\f': out += 'f'; break;
    case '\n': out += 'n'; break;
    case '\r': out += 'r'; break;
    case '\t': out += 't'; break;
    case '\v': out += 'v'; break;
    default: out += 'x'; out += xdigit( ch >> 4 ); out += xdigit( ch & 0x0F );
    }
  }


bool ISO_8859::islower( const unsigned char ch )
  {
  if( ch < 128 ) return std::islower( ch );
  switch( ch )
    {
    case SSCARON:
    case SZCARON:
    case SLIGOE:
//  SSSHARP commented out because ISO_8859::toupper( SSSHARP ) does nothing
//  case SSSHARP:
    case SAGRAVE:
    case SAACUTE:
    case SACIRCU:
    case SATILDE:
    case SADIAER:
    case SARING :
    case SLIGAE:
    case SCCEDI :
    case SEGRAVE:
    case SEACUTE:
    case SECIRCU:
    case SEDIAER:
    case SIGRAVE:
    case SIACUTE:
    case SICIRCU:
    case SIDIAER:
    case SETH   :
    case SNTILDE:
    case SOGRAVE:
    case SOACUTE:
    case SOCIRCU:
    case SOTILDE:
    case SODIAER:
    case SOSTROK:
    case SUGRAVE:
    case SUACUTE:
    case SUCIRCU:
    case SUDIAER:
    case SYACUTE:
    case STHORN :
    case SYDIAER: return true;
    default:      return false;
    }
  }


bool ISO_8859::isupper( const unsigned char ch )
  {
  if( ch < 128 ) return std::isupper( ch );
  switch( ch )
    {
    case CSCARON:
    case CZCARON:
    case CLIGOE:
    case CYDIAER:
    case CAGRAVE:
    case CAACUTE:
    case CACIRCU:
    case CATILDE:
    case CADIAER:
    case CARING :
    case CLIGAE:
    case CCCEDI :
    case CEGRAVE:
    case CEACUTE:
    case CECIRCU:
    case CEDIAER:
    case CIGRAVE:
    case CIACUTE:
    case CICIRCU:
    case CIDIAER:
    case CETH   :
    case CNTILDE:
    case COGRAVE:
    case COACUTE:
    case COCIRCU:
    case COTILDE:
    case CODIAER:
    case COSTROK:
    case CUGRAVE:
    case CUACUTE:
    case CUCIRCU:
    case CUDIAER:
    case CYACUTE:
    case CTHORN : return true;
    default:      return false;
    }
  }


unsigned char ISO_8859::base_char( const unsigned char ch )
  {
  switch( ch )
    {
    case CAGRAVE: case CAACUTE: case CACIRCU: case CATILDE: case CADIAER:
    case CARING : return 'A';
    case CCCEDI : return 'C';
    case CEGRAVE: case CEACUTE: case CECIRCU: case CEDIAER: return 'E';
    case CIGRAVE: case CIACUTE: case CICIRCU: case CIDIAER: return 'I';
    case CNTILDE: return 'N';
    case COGRAVE: case COACUTE: case COCIRCU: case COTILDE: case CODIAER:
    case COSTROK: return 'O';
    case CSCARON: return 'S';
    case CUGRAVE: case CUACUTE: case CUCIRCU: case CUDIAER: return 'U';
    case CYACUTE: case CYDIAER: return 'Y';
    case CZCARON: return 'Z';
    case SAGRAVE: case SAACUTE: case SACIRCU: case SATILDE: case SADIAER:
    case SARING : return 'a';
    case SCCEDI : return 'c';
    case SEGRAVE: case SEACUTE: case SECIRCU: case SEDIAER: return 'e';
    case SIGRAVE: case SIACUTE: case SICIRCU: case SIDIAER: return 'i';
    case SNTILDE: return 'n';
    case SOGRAVE: case SOACUTE: case SOCIRCU: case SOTILDE: case SODIAER:
    case SOSTROK: return 'o';
    case SSCARON: return 's';
    case SUGRAVE: case SUACUTE: case SUCIRCU: case SUDIAER: return 'u';
    case SYACUTE: case SYDIAER: return 'y';
    case SZCARON: return 'z';
    default:      return ch;
    }
  }


const char * ISO_8859::control_name( const unsigned char ch )	// '^A' --> "^A"
  {
  static char buf[3];
  if( ch < 32 ) { buf[0] = '^'; buf[1] = ch + '@'; buf[2] = 0; }
  else buf[0] = 0;
  return buf;
  }


int ISO_8859::controlize( int code )	// converts 'A' or 'a' into '^A'
  {
  if( code >= 0 && code < 128 )
    {
    if( std::isupper( code ) ) code = code - 'A' + 1;
    else if( std::islower( code ) ) code = code - 'a' + 1;
    }
  return code;
  }


int ISO_8859::decontrolize( int code )	// converts '^A' or 'a' into 'A'
  {
  if( code >= 0 && code < 128 )
    { if( code < 32 ) code += '@'; else code = std::toupper( code ); }
  return code;
  }


unsigned char ISO_8859::tolower( const unsigned char ch )
  {
  if( ch < 128 ) return std::tolower( ch );
  switch( ch )
    {
    case CSCARON: return SSCARON;
    case CZCARON: return SZCARON;
    case CLIGOE : return SLIGOE;
    case CYDIAER: return SYDIAER;
    case CAGRAVE: return SAGRAVE;
    case CAACUTE: return SAACUTE;
    case CACIRCU: return SACIRCU;
    case CATILDE: return SATILDE;
    case CADIAER: return SADIAER;
    case CARING : return SARING;
    case CLIGAE : return SLIGAE;
    case CCCEDI : return SCCEDI;
    case CEGRAVE: return SEGRAVE;
    case CEACUTE: return SEACUTE;
    case CECIRCU: return SECIRCU;
    case CEDIAER: return SEDIAER;
    case CIGRAVE: return SIGRAVE;
    case CIACUTE: return SIACUTE;
    case CICIRCU: return SICIRCU;
    case CIDIAER: return SIDIAER;
    case CETH   : return SETH;
    case CNTILDE: return SNTILDE;
    case COGRAVE: return SOGRAVE;
    case COACUTE: return SOACUTE;
    case COCIRCU: return SOCIRCU;
    case COTILDE: return SOTILDE;
    case CODIAER: return SODIAER;
    case COSTROK: return SOSTROK;
    case CUGRAVE: return SUGRAVE;
    case CUACUTE: return SUACUTE;
    case CUCIRCU: return SUCIRCU;
    case CUDIAER: return SUDIAER;
    case CYACUTE: return SYACUTE;
    case CTHORN : return STHORN;
    default     : return ch;
    }
  }


unsigned char ISO_8859::toupper( const unsigned char ch )
  {
  if( ch < 128 ) return std::toupper( ch );
  switch( ch )
    {
    case SSCARON: return CSCARON;
    case SZCARON: return CZCARON;
    case SLIGOE : return CLIGOE;
    case SYDIAER: return CYDIAER;
    case SAGRAVE: return CAGRAVE;
    case SAACUTE: return CAACUTE;
    case SACIRCU: return CACIRCU;
    case SATILDE: return CATILDE;
    case SADIAER: return CADIAER;
    case SARING : return CARING;
    case SLIGAE : return CLIGAE;
    case SCCEDI : return CCCEDI;
    case SEGRAVE: return CEGRAVE;
    case SEACUTE: return CEACUTE;
    case SECIRCU: return CECIRCU;
    case SEDIAER: return CEDIAER;
    case SIGRAVE: return CIGRAVE;
    case SIACUTE: return CIACUTE;
    case SICIRCU: return CICIRCU;
    case SIDIAER: return CIDIAER;
    case SETH   : return CETH;
    case SNTILDE: return CNTILDE;
    case SOGRAVE: return COGRAVE;
    case SOACUTE: return COACUTE;
    case SOCIRCU: return COCIRCU;
    case SOTILDE: return COTILDE;
    case SODIAER: return CODIAER;
    case SOSTROK: return COSTROK;
    case SUGRAVE: return CUGRAVE;
    case SUACUTE: return CUACUTE;
    case SUCIRCU: return CUCIRCU;
    case SUDIAER: return CUDIAER;
    case SYACUTE: return CYACUTE;
    case STHORN : return CTHORN;
    default     : return ch;
    }
  }


void ISO_8859::map_to_ascii( const unsigned char ch, std::string & out )
  {
  if( ch < 128 ) out += ch;
  else if( ch < 0xA0 ) { out += '\x1B'; out += ch - 0x40; }
  else
    {
    const char * p = 0;
    char c = 0;
    switch( Code( ch ) )
      {
      case NBSP   : c = ' '; break;
      case IEXCLAM: c = '!'; break;
      case CENT   : c = 'c'; break;
      case POUND  : c = 'L'; break;
      case EURO   : p = "euro"; break;
      case YEN    : c = 'Y'; break;
      case CSCARON: c = 'S'; break;
      case SECTION: p = "SS"; break;
      case SSCARON: c = 's'; break;
      case COPY   : p = "(C)"; break;
      case FEMIORD: p = ".a"; break;
      case LDANGLE: p = "<<"; break;
      case NOT    : c = '!'; break;
      case SHY    : c = '-'; break;
      case REG    : c = 'R'; break;
      case MACRON : c = '~'; break;
      case DEG    : c = 'o'; break;
      case PLUSMIN: p = "+-"; break;
      case POW2   : p = "^2"; break;
      case POW3   : p = "^3"; break;
      case CZCARON: c = 'Z'; break;
      case MICRO  : c = 'u'; break;
      case PILCROW: c = 'P'; break;
      case MIDDOT : c = '.'; break;
      case SZCARON: c = 'z'; break;
      case POW1   : p = "^1"; break;
      case MASCORD: p = ".o"; break;
      case RDANGLE: p = ">>"; break;
      case CLIGOE : c = 'E'; break;
      case SLIGOE : c = 'e'; break;
      case CYDIAER: c = 'Y'; break;
      case IQUEST : c = '?'; break;
      case CAGRAVE:
      case CAACUTE:
      case CACIRCU:
      case CATILDE:
      case CADIAER:
      case CARING : c = 'A'; break;
      case CLIGAE : c = 'E'; break;
      case CCCEDI : c = 'C'; break;
      case CEGRAVE:
      case CEACUTE:
      case CECIRCU:
      case CEDIAER: c = 'E'; break;
      case CIGRAVE:
      case CIACUTE:
      case CICIRCU:
      case CIDIAER: c = 'I'; break;
      case CETH   : c = 'D'; break;
      case CNTILDE: p = "N~"; break;
      case COGRAVE:
      case COACUTE:
      case COCIRCU:
      case COTILDE:
      case CODIAER: c = 'O'; break;
      case MULT   : c = 'x'; break;
      case COSTROK: p = "Oe"; break;
      case CUGRAVE:
      case CUACUTE:
      case CUCIRCU:
      case CUDIAER: c = 'U'; break;
      case CYACUTE: c = 'Y'; break;
      case CTHORN : p = "Th"; break;
      case SSSHARP: p = "ss"; break;
      case SAGRAVE:
      case SAACUTE:
      case SACIRCU:
      case SATILDE:
      case SADIAER:
      case SARING : c = 'a'; break;
      case SLIGAE : c = 'e'; break;
      case SCCEDI : c = 'c'; break;
      case SEGRAVE:
      case SEACUTE:
      case SECIRCU:
      case SEDIAER: c = 'e'; break;
      case SIGRAVE:
      case SIACUTE:
      case SICIRCU:
      case SIDIAER: c = 'i'; break;
      case SETH   : c = 'd'; break;
      case SNTILDE: p = "n~"; break;
      case SOGRAVE:
      case SOACUTE:
      case SOCIRCU:
      case SOTILDE:
      case SODIAER: c = 'o'; break;
      case DIV    : c = '/'; break;
      case SOSTROK: p = "oe"; break;
      case SUGRAVE:
      case SUACUTE:
      case SUCIRCU:
      case SUDIAER: c = 'u'; break;
      case SYACUTE: c = 'y'; break;
      case STHORN : p = "th"; break;
      case SYDIAER: c = 'y'; break;
      }
    if( p ) out += p; else out += c;
    }
  }


int ISO_8859::xvalue( const unsigned char ch )
  {
  if( ch >= '0' && ch <= '9' ) return ch - '0';
  if( ch >= 'A' && ch <= 'F' ) return ch - 'A' + 10;
  if( ch >= 'a' && ch <= 'f' ) return ch - 'a' + 10;
  return -1;
  }
