/* GNU moe - My Own Editor
   Copyright (C) 2005-2025 Antonio Diaz Diaz.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <cctype>
#include <string>

#include "encoding.h"
#include "iso_8859.h"


namespace Encoding {

// Charset independent Base64 alphabet (RFC 4648).
//
int base64_value( const unsigned char ch )
  {
  switch( ch )
    {
    case 'A': return 0;
    case 'B': return 1;
    case 'C': return 2;
    case 'D': return 3;
    case 'E': return 4;
    case 'F': return 5;
    case 'G': return 6;
    case 'H': return 7;
    case 'I': return 8;
    case 'J': return 9;
    case 'K': return 10;
    case 'L': return 11;
    case 'M': return 12;
    case 'N': return 13;
    case 'O': return 14;
    case 'P': return 15;
    case 'Q': return 16;
    case 'R': return 17;
    case 'S': return 18;
    case 'T': return 19;
    case 'U': return 20;
    case 'V': return 21;
    case 'W': return 22;
    case 'X': return 23;
    case 'Y': return 24;
    case 'Z': return 25;
    case 'a': return 26;
    case 'b': return 27;
    case 'c': return 28;
    case 'd': return 29;
    case 'e': return 30;
    case 'f': return 31;
    case 'g': return 32;
    case 'h': return 33;
    case 'i': return 34;
    case 'j': return 35;
    case 'k': return 36;
    case 'l': return 37;
    case 'm': return 38;
    case 'n': return 39;
    case 'o': return 40;
    case 'p': return 41;
    case 'q': return 42;
    case 'r': return 43;
    case 's': return 44;
    case 't': return 45;
    case 'u': return 46;
    case 'v': return 47;
    case 'w': return 48;
    case 'x': return 49;
    case 'y': return 50;
    case 'z': return 51;
    case '0': return 52;
    case '1': return 53;
    case '2': return 54;
    case '3': return 55;
    case '4': return 56;
    case '5': return 57;
    case '6': return 58;
    case '7': return 59;
    case '8': return 60;
    case '9': return 61;
    case '+': return 62;
    case '/': return 63;
    default: return -1;
    }
  }


/* Decode UCS to ISO-8859-15.
   Return -1 if not found, 256 if code should be ignored.
*/
int map_to_byte( const int code )
  {
  if( code < 0 ) return -1;
  switch( code )
    {
    case 0x00A6: return '|';	// broken bar
    case 0x00A8: return '"';	// diaeresis
    case 0x00B4: return '\'';	// acute accent
    case 0x00B8: return ',';	// cedilla
    case 0x00BC:		// vulgar fraction one quarter
    case 0x00BD:		// vulgar fraction one half
    case 0x00BE: return -1;	// vulgar fraction three quarters
    case 0x0100: return 'A';	// latin capital letter a with macron
    case 0x0101: return 'a';	// latin small letter a with macron
    case 0x0102: return 'A';	// latin capital letter a with breve
    case 0x0103: return 'a';	// latin small letter a with breve
    case 0x0104: return 'A';	// latin capital letter a with ogonek
    case 0x0105: return 'a';	// latin small letter a with ogonek
    case 0x0106: return 'C';	// latin capital letter c with acute
    case 0x0107: return 'c';	// latin small letter c with acute
    case 0x0108: return 'C';	// latin capital letter c with circumflex
    case 0x0109: return 'c';	// latin small letter c with circumflex
    case 0x010A: return 'C';	// latin capital letter c with dot above
    case 0x010B: return 'c';	// latin small letter c with dot above
    case 0x010C: return 'C';	// latin capital letter c with caron
    case 0x010D: return 'c';	// latin small letter c with caron
    case 0x010E: return 'D';	// latin capital letter d with caron
    case 0x010F: return 'd';	// latin small letter d with caron
    case 0x0110: return 'D';	// latin capital letter d with stroke
    case 0x0111: return 'd';	// latin small letter d with stroke
    case 0x0112: return 'E';	// latin capital letter e with macron
    case 0x0113: return 'e';	// latin small letter e with macron
    case 0x0114: return 'E';	// latin capital letter e with breve
    case 0x0115: return 'e';	// latin small letter e with breve
    case 0x0116: return 'E';	// latin capital letter e with dot above
    case 0x0117: return 'e';	// latin small letter e with dot above
    case 0x0118: return 'E';	// latin capital letter e with ogonek
    case 0x0119: return 'e';	// latin small letter e with ogonek
    case 0x011A: return 'E';	// latin capital letter e with caron
    case 0x011B: return 'e';	// latin small letter e with caron
    case 0x011C: return 'G';	// latin capital letter g with circumflex
    case 0x011D: return 'g';	// latin small letter g with circumflex
    case 0x011E: return 'G';	// latin capital letter g with breve
    case 0x011F: return 'g';	// latin small letter g with breve
    case 0x0120: return 'G';	// latin capital letter g with dot above
    case 0x0121: return 'g';	// latin small letter g with dot above
    case 0x0122: return 'G';	// latin capital letter g with cedilla
    case 0x0123: return 'g';	// latin small letter g with cedilla
    case 0x0124: return 'H';	// latin capital letter h with circumflex
    case 0x0125: return 'h';	// latin small letter h with circumflex
    case 0x0126: return 'H';	// latin capital letter h with stroke
    case 0x0127: return 'h';	// latin small letter h with stroke
    case 0x0128: return 'I';	// latin capital letter i with tilde
    case 0x0129: return 'i';	// latin small letter i with tilde
    case 0x012A: return 'I';	// latin capital letter i with macron
    case 0x012B: return 'i';	// latin small letter i with macron
    case 0x012C: return 'I';	// latin capital letter i with breve
    case 0x012D: return 'i';	// latin small letter i with breve
    case 0x012E: return 'I';	// latin capital letter i with ogonek
    case 0x012F: return 'i';	// latin small letter i with ogonek
    case 0x0130: return 'I';	// latin capital letter i with dot above
    case 0x0131: return 'i';	// latin small letter dotless i
    case 0x0134: return 'J';	// latin capital letter j with circumflex
    case 0x0135: return 'j';	// latin small letter j with circumflex
    case 0x0136: return 'K';	// latin capital letter k with cedilla
    case 0x0137: return 'k';	// latin small letter k with cedilla
    case 0x0138: return 'k';	// latin small letter kra
    case 0x0139: return 'L';	// latin capital letter l with acute
    case 0x013A: return 'l';	// latin small letter l with acute
    case 0x013B: return 'L';	// latin capital letter l with cedilla
    case 0x013C: return 'l';	// latin small letter l with cedilla
    case 0x013D: return 'L';	// latin capital letter l with caron
    case 0x013E: return 'l';	// latin small letter l with caron
    case 0x013F: return 'L';	// latin capital letter l with middle dot
    case 0x0140: return 'l';	// latin small letter l with middle dot
    case 0x0141: return 'L';	// latin capital letter l with stroke
    case 0x0142: return 'l';	// latin small letter l with stroke
    case 0x0143: return 'N';	// latin capital letter n with acute
    case 0x0144: return 'n';	// latin small letter n with acute
    case 0x0145: return 'N';	// latin capital letter n with cedilla
    case 0x0146: return 'n';	// latin small letter n with cedilla
    case 0x0147: return 'N';	// latin capital letter n with caron
    case 0x0148: return 'n';	// latin small letter n with caron
    case 0x0149: return 'n';	// latin small letter n preceded by apostrophe
    case 0x014A: return 'N';	// latin capital letter eng
    case 0x014B: return 'n';	// latin small letter eng
    case 0x014C: return 'O';	// latin capital letter o with macron
    case 0x014D: return 'o';	// latin small letter o with macron
    case 0x014E: return 'O';	// latin capital letter o with breve
    case 0x014F: return 'o';	// latin small letter o with breve
    case 0x0150: return 0xD6;	// latin capital letter o with double acute
    case 0x0151: return 0xF6;	// latin small letter o with double acute
    case 0x0152: return 0xBC;	// latin capital ligature oe
    case 0x0153: return 0xBD;	// latin small ligature oe
    case 0x0154: return 'R';	// latin capital letter r with acute
    case 0x0155: return 'r';	// latin small letter r with acute
    case 0x0156: return 'R';	// latin capital letter r with cedilla
    case 0x0157: return 'r';	// latin small letter r with cedilla
    case 0x0158: return 'R';	// latin capital letter r with caron
    case 0x0159: return 'r';	// latin small letter r with caron
    case 0x015A: return 'S';	// latin capital letter s with acute
    case 0x015B: return 's';	// latin small letter s with acute
    case 0x015C: return 'S';	// latin capital letter s with circumflex
    case 0x015D: return 's';	// latin small letter s with circumflex
    case 0x015E: return 'S';	// latin capital letter s with cedilla
    case 0x015F: return 's';	// latin small letter s with cedilla
    case 0x0160: return 0xA6;	// latin capital letter s with caron
    case 0x0161: return 0xA8;	// latin small letter s with caron
    case 0x0162: return 'T';	// latin capital letter t with cedilla
    case 0x0163: return 't';	// latin small letter t with cedilla
    case 0x0164: return 'T';	// latin capital letter t with caron
    case 0x0165: return 't';	// latin small letter t with caron
    case 0x0166: return 'T';	// latin capital letter t with stroke
    case 0x0167: return 't';	// latin small letter t with stroke
    case 0x0168: return 'U';	// latin capital letter u with tilde
    case 0x0169: return 'u';	// latin small letter u with tilde
    case 0x016A: return 'U';	// latin capital letter u with macron
    case 0x016B: return 'u';	// latin small letter u with macron
    case 0x016C: return 'U';	// latin capital letter u with breve
    case 0x016D: return 'u';	// latin small letter u with breve
    case 0x016E: return 'U';	// latin capital letter u with ring above
    case 0x016F: return 'u';	// latin small letter u with ring above
    case 0x0170: return 0xDC;	// latin capital letter u with double acute
    case 0x0171: return 0xFC;	// latin small letter u with double acute
    case 0x0172: return 'U';	// latin capital letter u with ogonek
    case 0x0173: return 'u';	// latin small letter u with ogonek
    case 0x0174: return 'W';	// latin capital letter w with circumflex
    case 0x0175: return 'w';	// latin small letter w with circumflex
    case 0x0176: return 'Y';	// latin capital letter y with circumflex
    case 0x0177: return 'y';	// latin small letter y with circumflex
    case 0x0178: return 0xBE;	// latin capital letter y with diaeresis
    case 0x0179: return 'Z';	// latin capital letter z with acute
    case 0x017A: return 'z';	// latin small letter z with acute
    case 0x017B: return 'Z';	// latin capital letter z with dot above
    case 0x017C: return 'z';	// latin small letter z with dot above
    case 0x017D: return 0xB4;	// latin capital letter z with caron
    case 0x017E: return 0xB8;	// latin small letter z with caron
    case 0x017F: return 'f';	// latin small letter long s
    case 0x01CD: return 'A';	// latin capital letter a with caron
    case 0x01CE: return 'a';	// latin small letter a with caron
    case 0x01CF: return 'I';	// latin capital letter i with caron
    case 0x01D0: return 'i';	// latin small letter i with caron
    case 0x01D1: return 'O';	// latin capital letter o with caron
    case 0x01D2: return 'o';	// latin small letter o with caron
    case 0x01D3: return 'U';	// latin capital letter u with caron
    case 0x01D4: return 'u';	// latin small letter u with caron
    case 0x0232: return 'Y';	// latin capital letter y with macron
    case 0x0233: return 'y';	// latin small letter y with macron
    case 0x02C6: return '^';	// modifier letter circumflex accent
    case 0x02DC: return '~';	// small tilde
    // combining diacritical marks
    case 0x0300: return '`';	// combining grave accent
    case 0x0301: return '\'';	// combining acute accent
    case 0x0302: return '^';	// combining circumflex accent
    case 0x0303: return '~';	// combining tilde
    case 0x0304:		// combining macron
    case 0x0305: return 0xAF;	// combining overline
    case 0x0308: return '"';	// combining diaeresis
    case 0x030A: return 0xB0;	// combining ring above
    case 0x030B:		// combining double acute accent
    case 0x030E: return '"';	// combining double vertical line above
    case 0x0331:		// combining macron below
    case 0x0332: return '_';	// combining low line
    case 0x0374: return '\'';	// greek numeral sign
    case 0x0375: return ',';	// greek lower numeral sign
    case 0x037E: return ';';	// greek question mark
    case 0x0384: return '\'';	// greek tonos
    case 0x0385: return '"';	// greek dialytika tonos
    case 0x0386: return 0xC1;	// greek capital letter alpha with tonos
    case 0x0387: return 0xB7;	// greek ano teleia
    case 0x0388: return 0xC9;	// greek capital letter epsilon with tonos
    case 0x0389: return 'H';	// greek capital letter eta with tonos
    case 0x038A: return 0xCD;	// greek capital letter iota with tonos
    case 0x038C: return 0xD3;	// greek capital letter omicron with tonos
    case 0x038E: return 0xDD;	// greek capital letter upsilon with tonos
    case 0x038F: return 'W';	// greek capital letter omega with tonos
    case 0x0390: return 0xEF;	// greek small letter iota with dialytika tonos
    case 0x0391: return 'A';	// greek capital letter alpha
    case 0x0392: return 'B';	// greek capital letter beta
    case 0x0393: return 'G';	// greek capital letter gamma
    case 0x0394: return 'D';	// greek capital letter delta
    case 0x0395: return 'E';	// greek capital letter epsilon
    case 0x0396: return 'Z';	// greek capital letter zeta
    case 0x0397: return 'H';	// greek capital letter eta
    case 0x0399: return 'I';	// greek capital letter iota
    case 0x039A: return 'K';	// greek capital letter kappa
    case 0x039B: return 'L';	// greek capital letter lambda
    case 0x039C: return 'M';	// greek capital letter mu
    case 0x039D: return 'N';	// greek capital letter nu
    case 0x039E: return 'X';	// greek capital letter xi
    case 0x039F: return 'O';	// greek capital letter omicron
    case 0x03A0: return 'P';	// greek capital letter pi
    case 0x03A1: return 'R';	// greek capital letter rho
    case 0x03A3: return 'S';	// greek capital letter sigma
    case 0x03A4: return 'T';	// greek capital letter tau
    case 0x03A5: return 'Y';	// greek capital letter upsilon
    case 0x03A6: return 'F';	// greek capital letter phi
    case 0x03A9: return 'W';	// greek capital letter omega
    case 0x03AA: return 0xCF;	// greek capital letter iota with dialytika
    case 0x03AB: return 0xBE;	// greek capital letter upsilon with dialytika
    case 0x03AC: return 0xE1;	// greek small letter alpha with tonos
    case 0x03AD: return 0xE9;	// greek small letter epsilon with tonos
    case 0x03AE: return 'h';	// greek small letter eta with tonos
    case 0x03AF: return 0xED;	// greek small letter iota with tonos
    case 0x03B1: return 'a';	// greek small letter alpha
    case 0x03B2: return 'b';	// greek small letter beta
    case 0x03B3: return 'g';	// greek small letter gamma
    case 0x03B4: return 'd';	// greek small letter delta
    case 0x03B5: return 'e';	// greek small letter epsilon
    case 0x03B6: return 'z';	// greek small letter zeta
    case 0x03B7: return 'h';	// greek small letter eta
    case 0x03B9: return 'i';	// greek small letter iota
    case 0x03BA: return 'k';	// greek small letter kappa
    case 0x03BB: return 'l';	// greek small letter lambda
    case 0x03BC: return 'm';	// greek small letter mu
    case 0x03BD: return 'n';	// greek small letter nu
    case 0x03BE: return 'x';	// greek small letter xi
    case 0x03BF: return 'o';	// greek small letter omicron
    case 0x03C0: return 'p';	// greek small letter pi
    case 0x03C1: return 'r';	// greek small letter rho
    case 0x03C2: return 's';	// greek small letter final sigma
    case 0x03C3: return 's';	// greek small letter sigma
    case 0x03C4: return 't';	// greek small letter tau
    case 0x03C5: return 'y';	// greek small letter upsilon
    case 0x03C6: return 'f';	// greek small letter phi
    case 0x03C9: return 'w';	// greek small letter omega
    case 0x03CA: return 0xEF;	// greek small letter iota with dialytika
    case 0x03CB: return 0xFF;	// greek small letter upsilon with dialytika
    case 0x03CC: return 0xF3;	// greek small letter omicron with tonos
    case 0x03CD: return 0xFD;	// greek small letter upsilon with tonos
    case 0x03CE: return 'w';	// greek small letter omega with tonos
    case 0x03D0: return 'b';	// greek beta symbol
    case 0x03D2: return 'Y';	// greek upsilon with hook symbol
    case 0x03D3: return 0xDD;	// greek upsilon with acute and hook symbol
    case 0x03D4: return 0xBE;	// greek upsilon with diaeresis and hook symbol
    case 0x03D5: return 'f';	// greek phi symbol
    case 0x03D6: return 'p';	// greek pi symbol
    case 0x03D7: return '&';	// greek kai symbol
    case 0x03F0: return 'k';	// greek kappa symbol
    case 0x03F1: return 'r';	// greek rho symbol
    case 0x03F2: return 's';	// greek lunate sigma symbol
    case 0x03F3: return 'j';	// greek letter yot
    case 0x03F5: return 'e';	// greek lunate epsilon symbol
    case 0x03F9: return 'S';	// greek capital lunate sigma symbol
    case 0x0400: return 0xC8;	// cyrillic capital letter ie with grave
    case 0x0401: return 0xCB;	// cyrillic capital letter io
    case 0x0402: return 'D';	// cyrillic capital letter dje
    case 0x0403: return 'G';	// cyrillic capital letter gje
    case 0x0404: return 0xCA;	// cyrillic capital letter ukrainian ie
    case 0x0405: return 'Z';	// cyrillic capital letter dze
    case 0x0406: return 0xCD;	// cyrillic capital letter byelorussian-ukrainian i
    case 0x0407: return 0xCF;	// cyrillic capital letter yi
    case 0x0408: return 'J';	// cyrillic capital letter je
    case 0x0409: return 'L';	// cyrillic capital letter lje
    case 0x040A: return 'N';	// cyrillic capital letter nje
    case 0x040B: return 'C';	// cyrillic capital letter tshe
    case 0x040C: return 'K';	// cyrillic capital letter kje
    case 0x040D: return 0xCC;	// cyrillic capital letter i with grave
    case 0x040E: return 'U';	// cyrillic capital letter short u
    case 0x040F: return 'D';	// cyrillic capital letter dzhe
    case 0x0410: return 'A';	// cyrillic capital letter a
    case 0x0411: return 'B';	// cyrillic capital letter be
    case 0x0412: return 'V';	// cyrillic capital letter ve
    case 0x0413: return 'G';	// cyrillic capital letter ghe
    case 0x0414: return 'D';	// cyrillic capital letter de
    case 0x0415: return 'E';	// cyrillic capital letter ie
    case 0x0417: return 'Z';	// cyrillic capital letter ze
    case 0x0418: return 'I';	// cyrillic capital letter i
    case 0x0419: return 'I';	// cyrillic capital letter short i
    case 0x041A: return 'K';	// cyrillic capital letter ka
    case 0x041B: return 'L';	// cyrillic capital letter el
    case 0x041C: return 'M';	// cyrillic capital letter em
    case 0x041D: return 'N';	// cyrillic capital letter en
    case 0x041E: return 'O';	// cyrillic capital letter o
    case 0x041F: return 'P';	// cyrillic capital letter pe
    case 0x0420: return 'R';	// cyrillic capital letter er
    case 0x0421: return 'S';	// cyrillic capital letter es
    case 0x0422: return 'T';	// cyrillic capital letter te
    case 0x0423: return 'U';	// cyrillic capital letter u
    case 0x0424: return 'F';	// cyrillic capital letter ef
    case 0x0425: return 'H';	// cyrillic capital letter ha
    case 0x0426: return 'C';	// cyrillic capital letter tse
    case 0x0429: return 'S';	// cyrillic capital letter shcha
    case 0x042A: return '"';	// cyrillic capital letter hard sign
    case 0x042B: return 'Y';	// cyrillic capital letter yeru
    case 0x042C: return '\'';	// cyrillic capital letter soft sign
    case 0x042D: return 'E';	// cyrillic capital letter e
//    case 0x042E: return 0xDB;	// cyrillic capital letter yu
//    case 0x042F: return 0xC2;	// cyrillic capital letter ya
    case 0x0430: return 'a';	// cyrillic small letter a
    case 0x0431: return 'b';	// cyrillic small letter be
    case 0x0432: return 'v';	// cyrillic small letter ve
    case 0x0433: return 'g';	// cyrillic small letter ghe
    case 0x0434: return 'd';	// cyrillic small letter de
    case 0x0435: return 'e';	// cyrillic small letter ie
    case 0x0437: return 'z';	// cyrillic small letter ze
    case 0x0438: return 'i';	// cyrillic small letter i
    case 0x0439: return 'i';	// cyrillic small letter short i
    case 0x043A: return 'k';	// cyrillic small letter ka
    case 0x043B: return 'l';	// cyrillic small letter el
    case 0x043C: return 'm';	// cyrillic small letter em
    case 0x043D: return 'n';	// cyrillic small letter en
    case 0x043E: return 'o';	// cyrillic small letter o
    case 0x043F: return 'p';	// cyrillic small letter pe
    case 0x0440: return 'r';	// cyrillic small letter er
    case 0x0441: return 's';	// cyrillic small letter es
    case 0x0442: return 't';	// cyrillic small letter te
    case 0x0443: return 'u';	// cyrillic small letter u
    case 0x0444: return 'f';	// cyrillic small letter ef
    case 0x0445: return 'h';	// cyrillic small letter ha
    case 0x0446: return 'c';	// cyrillic small letter tse
    case 0x0449: return 's';	// cyrillic small letter shcha
    case 0x044A: return '"';	// cyrillic small letter hard sign
    case 0x044B: return 'y';	// cyrillic small letter yeru
    case 0x044C: return '\'';	// cyrillic small letter soft sign
    case 0x044D: return 'e';	// cyrillic small letter e
//    case 0x044E: return 0xFB;	// cyrillic small letter yu
//    case 0x044F: return 0xE2;	// cyrillic small letter ya
    case 0x0450: return 0xE8;	// cyrillic small letter ie with grave
    case 0x0451: return 0xEB;	// cyrillic small letter io
    case 0x0452: return 'd';	// cyrillic small letter dje
    case 0x0453: return 'g';	// cyrillic small letter gje
    case 0x0454: return 0xEA;	// cyrillic small letter ukrainian ie
    case 0x0455: return 'z';	// cyrillic small letter dze
    case 0x0456: return 0xED;	// cyrillic small letter byelorussian-ukrainian i
    case 0x0457: return 0xEF;	// cyrillic small letter yi
    case 0x0458: return 'j';	// cyrillic small letter je
    case 0x0459: return 'l';	// cyrillic small letter lje
    case 0x045A: return 'n';	// cyrillic small letter nje
    case 0x045B: return 'c';	// cyrillic small letter tshe
    case 0x045C: return 'k';	// cyrillic small letter kje
    case 0x045D: return 0xEC;	// cyrillic small letter i with grave
    case 0x045E: return 'u';	// cyrillic small letter short u
    case 0x045F: return 'd';	// cyrillic small letter dzhe
    case 0x0490: return 'G';	// cyrillic capital letter ghe with upturn
    case 0x0491: return 'g';	// cyrillic small letter ghe with upturn
    case 0x0492: return 'G';	// cyrillic capital letter ghe with stroke
    case 0x0493: return 'g';	// cyrillic small letter ghe with stroke
    case 0x0494: return 'G';	// cyrillic capital letter ghe with middle hook
    case 0x0495: return 'g';	// cyrillic small letter ghe with middle hook
    case 0x0498: return 'Z';	// cyrillic capital letter ze with descender
    case 0x0499: return 'z';	// cyrillic small letter ze with descender
    case 0x049A: return 'K';	// cyrillic capital letter ka with descender
    case 0x049B: return 'k';	// cyrillic small letter ka with descender
    case 0x04C1: return 'Z';	// cyrillic capital letter zhe with breve
    case 0x04C2: return 'z';	// cyrillic small letter zhe with breve
    case 0x04D0: return 'A';	// cyrillic capital letter a with breve
    case 0x04D1: return 'a';	// cyrillic small letter a with breve
    case 0x04D2: return 0xC4;	// cyrillic capital letter a with diaeresis
    case 0x04D3: return 0xE4;	// cyrillic small letter a with diaeresis
    case 0x04D6: return 'E';	// cyrillic capital letter ie with breve
    case 0x04D7: return 'e';	// cyrillic small letter ie with breve
    case 0x04D8: return 0xC4;	// cyrillic capital letter schwa
    case 0x04D9: return 0xE4;	// cyrillic small letter schwa
    case 0x04DA: return 0xC4;	// cyrillic capital letter schwa with diaeresis
    case 0x04DB: return 0xE4;	// cyrillic small letter schwa with diaeresis
    case 0x04DC: return 'Z';	// cyrillic capital letter zhe with diaeresis
    case 0x04DD: return 'z';	// cyrillic small letter zhe with diaeresis
    case 0x04DE: return 'Z';	// cyrillic capital letter ze with diaeresis
    case 0x04DF: return 'z';	// cyrillic small letter ze with diaeresis
    case 0x04E2: return 'I';	// cyrillic capital letter i with macron
    case 0x04E3: return 'i';	// cyrillic small letter i with macron
    case 0x04E4: return 0xCF;	// cyrillic capital letter i with diaeresis
    case 0x04E5: return 0xEF;	// cyrillic small letter i with diaeresis
    case 0x04E6: return 0xD6;	// cyrillic capital letter o with diaeresis
    case 0x04E7: return 0xF6;	// cyrillic small letter o with diaeresis
    case 0x04EC: return 0xCB;	// cyrillic capital letter e with diaeresis
    case 0x04ED: return 0xEB;	// cyrillic small letter e with diaeresis
    case 0x04EE: return 'U';	// cyrillic capital letter u with macron
    case 0x04EF: return 'u';	// cyrillic small letter u with macron
    case 0x04F0: return 0xDC;	// cyrillic capital letter u with diaeresis
    case 0x04F1: return 0xFC;	// cyrillic small letter u with diaeresis
    case 0x04F2: return 0xDC;	// cyrillic capital letter u with double acute
    case 0x04F3: return 0xFC;	// cyrillic small letter u with double acute
    case 0x04F4: return 'C';	// cyrillic capital letter che with diaeresis
    case 0x04F5: return 'c';	// cyrillic small letter che with diaeresis
    case 0x04F8: return 'Y';	// cyrillic capital letter yeru with diaeresis
    case 0x04F9: return 0xFF;	// cyrillic small letter yeru with diaeresis
    case 0x0531: return 'A';	// armenian capital letter ayb
    case 0x0532: return 'B';	// armenian capital letter ben
    case 0x0533: return 'G';	// armenian capital letter gim
    case 0x0534: return 'D';	// armenian capital letter da
    case 0x0535: return 'E';	// armenian capital letter ech
    case 0x0536: return 'Z';	// armenian capital letter za
    case 0x0537: return 'E';	// armenian capital letter eh
    case 0x0538: return 'Y';	// armenian capital letter et
    case 0x053B: return 'I';	// armenian capital letter ini
    case 0x053C: return 'L';	// armenian capital letter liwn
    case 0x053D: return 'X';	// armenian capital letter xeh
    case 0x053E: return 'C';	// armenian capital letter ca
    case 0x053F: return 'K';	// armenian capital letter ken
    case 0x0540: return 'H';	// armenian capital letter ho
    case 0x0541: return 'J';	// armenian capital letter ja
    case 0x0544: return 'M';	// armenian capital letter men
    case 0x0545: return 'Y';	// armenian capital letter yi
    case 0x0546: return 'N';	// armenian capital letter now
    case 0x0548: return 'O';	// armenian capital letter vo
    case 0x054A: return 'P';	// armenian capital letter peh
    case 0x054B: return 'J';	// armenian capital letter jheh
    case 0x054D: return 'S';	// armenian capital letter seh
    case 0x054E: return 'V';	// armenian capital letter vew
    case 0x054F: return 'T';	// armenian capital letter tiwn
    case 0x0550: return 'R';	// armenian capital letter reh
    case 0x0551: return 'C';	// armenian capital letter co
    case 0x0552: return 'W';	// armenian capital letter yiwn
    case 0x0555: return 'O';	// armenian capital letter oh
    case 0x0556: return 'F';	// armenian capital letter feh
    case 0x0561: return 'a';	// armenian small letter ayb
    case 0x0562: return 'b';	// armenian small letter ben
    case 0x0563: return 'g';	// armenian small letter gim
    case 0x0564: return 'd';	// armenian small letter da
    case 0x0565: return 'e';	// armenian small letter ech
    case 0x0566: return 'z';	// armenian small letter za
    case 0x0567: return 'e';	// armenian small letter eh
    case 0x0568: return 'y';	// armenian small letter et
    case 0x056B: return 'i';	// armenian small letter ini
    case 0x056C: return 'l';	// armenian small letter liwn
    case 0x056D: return 'x';	// armenian small letter xeh
    case 0x056E: return 'c';	// armenian small letter ca
    case 0x056F: return 'k';	// armenian small letter ken
    case 0x0570: return 'h';	// armenian small letter ho
    case 0x0571: return 'j';	// armenian small letter ja
    case 0x0574: return 'm';	// armenian small letter men
    case 0x0575: return 'y';	// armenian small letter yi
    case 0x0576: return 'n';	// armenian small letter now
    case 0x0578: return 'o';	// armenian small letter vo
    case 0x057A: return 'p';	// armenian small letter peh
    case 0x057B: return 'j';	// armenian small letter jheh
    case 0x057D: return 's';	// armenian small letter seh
    case 0x057E: return 'v';	// armenian small letter vew
    case 0x057F: return 't';	// armenian small letter tiwn
    case 0x0580: return 'r';	// armenian small letter reh
    case 0x0581: return 'c';	// armenian small letter co
    case 0x0582: return 'w';	// armenian small letter yiwn
    case 0x0585: return 'o';	// armenian small letter oh
    case 0x0586: return 'f';	// armenian small letter feh
    case 0x060C: return ',';	// arabic comma
    case 0x066A: return '%';	// arabic percent sign
    case 0x066B: return '.';	// arabic decimal separator
    case 0x066C: return '\'';	// arabic thousands separator
    case 0x066D: return '*';	// arabic five pointed star
    case 0x10D0: return 'a';	// georgian letter an
    case 0x10D1: return 'b';	// georgian letter ban
    case 0x10D2: return 'g';	// georgian letter gan
    case 0x10D3: return 'd';	// georgian letter don
    case 0x10D4: return 'e';	// georgian letter en
    case 0x10D5: return 'v';	// georgian letter vin
    case 0x10D6: return 'z';	// georgian letter zen
    case 0x10D7: return 't';	// georgian letter tan
    case 0x10D8: return 'i';	// georgian letter in
    case 0x10D9: return 'k';	// georgian letter kan
    case 0x10DA: return 'l';	// georgian letter las
    case 0x10DB: return 'm';	// georgian letter man
    case 0x10DC: return 'n';	// georgian letter nar
    case 0x10DD: return 'o';	// georgian letter on
    case 0x10DE: return 'p';	// georgian letter par
    case 0x10E0: return 'r';	// georgian letter rae
    case 0x10E1: return 's';	// georgian letter san
    case 0x10E2: return 't';	// georgian letter tar
    case 0x10E3: return 'u';	// georgian letter un
    case 0x10E7: return 'q';	// georgian letter qar
    case 0x10EE: return 'x';	// georgian letter xan
    case 0x10EF: return 'j';	// georgian letter jhan
    case 0x10F0: return 'h';	// georgian letter hae
    // psili = oxia = acute, dasia = varia = grave, perispomeni = tilde
    case 0x1F00: return 0xE1;	// greek small letter alpha with psili
    case 0x1F01: return 0xE0;	// greek small letter alpha with dasia
    case 0x1F02: return 0xE2;	// greek small letter alpha with psili and varia
    case 0x1F03:		// greek small letter alpha with dasia and varia
    case 0x1F04:		// greek small letter alpha with psili and oxia
    case 0x1F05: return 0xE4;	// greek small letter alpha with dasia and oxia
    case 0x1F06:		// greek small letter alpha with psili and perispomeni
    case 0x1F07: return 0xE3;	// greek small letter alpha with dasia and perispomeni
    case 0x1F08: return 0xC1;	// greek capital letter alpha with psili
    case 0x1F09: return 0xC0;	// greek capital letter alpha with dasia
    case 0x1F0A: return 0xC2;	// greek capital letter alpha with psili and varia
    case 0x1F0B:		// greek capital letter alpha with dasia and varia
    case 0x1F0C:		// greek capital letter alpha with psili and oxia
    case 0x1F0D: return 0xC4;	// greek capital letter alpha with dasia and oxia
    case 0x1F0E:		// greek capital letter alpha with psili and perispomeni
    case 0x1F0F: return 0xC3;	// greek capital letter alpha with dasia and perispomeni
    case 0x1F10: return 0xE9;	// greek small letter epsilon with psili
    case 0x1F11: return 0xE8;	// greek small letter epsilon with dasia
    case 0x1F12: return 0xEA;	// greek small letter epsilon with psili and varia
    case 0x1F13:		// greek small letter epsilon with dasia and varia
    case 0x1F14:		// greek small letter epsilon with psili and oxia
    case 0x1F15: return 0xEB;	// greek small letter epsilon with dasia and oxia
    case 0x1F18: return 0xC9;	// greek capital letter epsilon with psili
    case 0x1F19: return 0xC8;	// greek capital letter epsilon with dasia
    case 0x1F1A: return 0xCA;	// greek capital letter epsilon with psili and varia
    case 0x1F1B:		// greek capital letter epsilon with dasia and varia
    case 0x1F1C:		// greek capital letter epsilon with psili and oxia
    case 0x1F1D: return 0xCB;	// greek capital letter epsilon with dasia and oxia
    case 0x1F20:		// greek small letter eta with psili
    case 0x1F21:		// greek small letter eta with dasia
    case 0x1F22:		// greek small letter eta with psili and varia
    case 0x1F23:		// greek small letter eta with dasia and varia
    case 0x1F24:		// greek small letter eta with psili and oxia
    case 0x1F25:		// greek small letter eta with dasia and oxia
    case 0x1F26:		// greek small letter eta with psili and perispomeni
    case 0x1F27: return 'h';	// greek small letter eta with dasia and perispomeni
    case 0x1F28:		// greek capital letter eta with psili
    case 0x1F29:		// greek capital letter eta with dasia
    case 0x1F2A:		// greek capital letter eta with psili and varia
    case 0x1F2B:		// greek capital letter eta with dasia and varia
    case 0x1F2C:		// greek capital letter eta with psili and oxia
    case 0x1F2D:		// greek capital letter eta with dasia and oxia
    case 0x1F2E:		// greek capital letter eta with psili and perispomeni
    case 0x1F2F: return 'H';	// greek capital letter eta with dasia and perispomeni
    case 0x1F30: return 0xED;	// greek small letter iota with psili
    case 0x1F31: return 0xEC;	// greek small letter iota with dasia
    case 0x1F32: return 0xEE;	// greek small letter iota with psili and varia
    case 0x1F33:		// greek small letter iota with dasia and varia
    case 0x1F34:		// greek small letter iota with psili and oxia
    case 0x1F35: return 0xEF;	// greek small letter iota with dasia and oxia
    case 0x1F36:		// greek small letter iota with psili and perispomeni
    case 0x1F37: return 0xEE;	// greek small letter iota with dasia and perispomeni
    case 0x1F38: return 0xCD;	// greek capital letter iota with psili
    case 0x1F39: return 0xCC;	// greek capital letter iota with dasia
    case 0x1F3A: return 0xCE;	// greek capital letter iota with psili and varia
    case 0x1F3B:		// greek capital letter iota with dasia and varia
    case 0x1F3C:		// greek capital letter iota with psili and oxia
    case 0x1F3D: return 0xCF;	// greek capital letter iota with dasia and oxia
    case 0x1F3E:		// greek capital letter iota with psili and perispomeni
    case 0x1F3F: return 0xCE;	// greek capital letter iota with dasia and perispomeni
    case 0x1F40: return 0xF3;	// greek small letter omicron with psili
    case 0x1F41: return 0xF2;	// greek small letter omicron with dasia
    case 0x1F42: return 0xF4;	// greek small letter omicron with psili and varia
    case 0x1F43:		// greek small letter omicron with dasia and varia
    case 0x1F44:		// greek small letter omicron with psili and oxia
    case 0x1F45: return 0xF6;	// greek small letter omicron with dasia and oxia
    case 0x1F48: return 0xD3;	// greek capital letter omicron with psili
    case 0x1F49: return 0xD2;	// greek capital letter omicron with dasia
    case 0x1F4A: return 0xD4;	// greek capital letter omicron with psili and varia
    case 0x1F4B:		// greek capital letter omicron with dasia and varia
    case 0x1F4C:		// greek capital letter omicron with psili and oxia
    case 0x1F4D: return 0xD6;	// greek capital letter omicron with dasia and oxia
    case 0x1F50:		// greek small letter upsilon with psili
    case 0x1F51: return 0xFD;	// greek small letter upsilon with dasia
    case 0x1F52:		// greek small letter upsilon with psili and varia
    case 0x1F53:		// greek small letter upsilon with dasia and varia
    case 0x1F54:		// greek small letter upsilon with psili and oxia
    case 0x1F55:		// greek small letter upsilon with dasia and oxia
    case 0x1F56:		// greek small letter upsilon with psili and perispomeni
    case 0x1F57: return 0xFF;	// greek small letter upsilon with dasia and perispomeni
    case 0x1F59: return 0xDD;	// greek capital letter upsilon with dasia
    case 0x1F5B:		// greek capital letter upsilon with dasia and varia
    case 0x1F5D:		// greek capital letter upsilon with dasia and oxia
    case 0x1F5F: return 0xBE;	// greek capital letter upsilon with dasia and perispomeni
    case 0x1F60:		// greek small letter omega with psili
    case 0x1F61:		// greek small letter omega with dasia
    case 0x1F62:		// greek small letter omega with psili and varia
    case 0x1F63:		// greek small letter omega with dasia and varia
    case 0x1F64:		// greek small letter omega with psili and oxia
    case 0x1F65:		// greek small letter omega with dasia and oxia
    case 0x1F66:		// greek small letter omega with psili and perispomeni
    case 0x1F67: return 'w';	// greek small letter omega with dasia and perispomeni
    case 0x1F68:		// greek capital letter omega with psili
    case 0x1F69:		// greek capital letter omega with dasia
    case 0x1F6A:		// greek capital letter omega with psili and varia
    case 0x1F6B:		// greek capital letter omega with dasia and varia
    case 0x1F6C:		// greek capital letter omega with psili and oxia
    case 0x1F6D:		// greek capital letter omega with dasia and oxia
    case 0x1F6E:		// greek capital letter omega with psili and perispomeni
    case 0x1F6F: return 'W';	// greek capital letter omega with dasia and perispomeni
    case 0x1F70: return 0xE0;	// greek small letter alpha with varia
    case 0x1F71: return 0xE1;	// greek small letter alpha with oxia
    case 0x1F72: return 0xE8;	// greek small letter epsilon with varia
    case 0x1F73: return 0xE9;	// greek small letter epsilon with oxia
    case 0x1F74:		// greek small letter eta with varia
    case 0x1F75: return 'h';	// greek small letter eta with oxia
    case 0x1F76: return 0xEC;	// greek small letter iota with varia
    case 0x1F77: return 0xED;	// greek small letter iota with oxia
    case 0x1F78: return 0xF2;	// greek small letter omicron with varia
    case 0x1F79: return 0xF3;	// greek small letter omicron with oxia
    case 0x1F7A:		// greek small letter upsilon with varia
    case 0x1F7B: return 0xFD;	// greek small letter upsilon with oxia
    case 0x1F7C:		// greek small letter omega with varia
    case 0x1F7D: return 'w';	// greek small letter omega with oxia
    case 0x2020: return '*';	// dagger
    case 0x2022: return 0xB7;	// bullet
    case 0x2024: return '.';
    case 0x2027: return 0xB7;	// hyphenation point
    case 0x2035: return '`';
    case 0x2039: return '<';
    case 0x203A: return '>';
    case 0x2044: return '/';
    case 0x204A: return '&';
    case 0x204B: return 0xB6;	// reversed pilcrow sign
    case 0x204E: return '*';
    case 0x204F: return ';';
    case 0x2052: return '%';
    case 0x2053: return '~';
    case 0x2070: return 0xB0;	// superscript zero
    case 0x2071: return 'i';	// superscript latin small letter i
    case 0x2072: return 0xB2;	// superscript two
    case 0x2073: return 0xB3;	// superscript three
    case 0x2074: return '4';	// superscript four
    case 0x2075: return '5';	// superscript five
    case 0x2076: return '6';	// superscript six
    case 0x2077: return '7';	// superscript seven
    case 0x2078: return '8';	// superscript eight
    case 0x2079: return '9';	// superscript nine
    case 0x207A: return '+';	// superscript plus sign
    case 0x207B: return '-';	// superscript minus
    case 0x207C: return '=';	// superscript equals sign
    case 0x207D: return '(';	// superscript left parenthesis
    case 0x207E: return ')';	// superscript right parenthesis
    case 0x207F: return 'n';	// superscript latin small letter n
    case 0x2080: return '0';	// subscript zero
    case 0x2081: return '1';	// subscript one
    case 0x2082: return '2';	// subscript two
    case 0x2083: return '3';	// subscript three
    case 0x2084: return '4';	// subscript four
    case 0x2085: return '5';	// subscript five
    case 0x2086: return '6';	// subscript six
    case 0x2087: return '7';	// subscript seven
    case 0x2088: return '8';	// subscript eight
    case 0x2089: return '9';	// subscript nine
    case 0x208A: return '+';	// subscript plus sign
    case 0x208B: return '-';	// subscript minus
    case 0x208C: return '=';	// subscript equals sign
    case 0x208D: return '(';	// subscript left parenthesis
    case 0x208E: return ')';	// subscript right parenthesis
    case 0x2090: return 'a';	// latin subscript small letter a
    case 0x2091: return 'e';	// latin subscript small letter e
    case 0x2092: return 'o';	// latin subscript small letter o
    case 0x2093: return 'x';	// latin subscript small letter x
    case 0x2094: return 'a';	// latin subscript small letter schwa
    case 0x2095: return 'h';	// latin subscript small letter h
    case 0x2096: return 'k';	// latin subscript small letter k
    case 0x2097: return 'l';	// latin subscript small letter l
    case 0x2098: return 'm';	// latin subscript small letter m
    case 0x2099: return 'n';	// latin subscript small letter n
    case 0x209A: return 'p';	// latin subscript small letter p
    case 0x209B: return 's';	// latin subscript small letter s
    case 0x209C: return 't';	// latin subscript small letter t
    case 0x20AC: return 0xA4;	// euro sign
    case 0x2112: return 'L';	// script capital l
    case 0x2113: return 'l';	// script small l
    case 0x212A: return 'K';	// Kelvin sign
    case 0x212B: return 0xC5;	// Angstrom sign
    case 0x2191: return '^';	// upwards arrow
    case 0x2193: return 'V';	// downwards arrow
    case 0x21D1: return '^';	// upwards double arrow
    case 0x21D3: return 'V';	// downwards double arrow
    case 0x2208: return 'E';	// element of
    case 0x220A: return 'e';	// small element of
    case 0x220F: return 'P';	// N-ary product
    case 0x2211: return 'S';	// N-ary summation
    case 0x2212: return '-';	// minus sign
    case 0x2213: return 0xB1;	// minus-or-plus sign
    case 0x2215: return '/';	// division slash
    case 0x2216: return '\\';	// set minus
    case 0x2217: return '*';	// asterisk operator
    case 0x2218: return 0xB0;	// ring operator
    case 0x2219: return 0xB7;	// bullet operator
    case 0x2223: return '|';	// divides
    case 0x2236: return ':';	// ratio
    case 0x223C: return '~';	// tilde operator
    case 0x2243:		// asymptotically equal to
    case 0x2245:		// approximately equal to
    case 0x2248:		// almost equal to
    case 0x2261: return '=';	// identical to
    case 0x227A: return '<';	// precedes
    case 0x227B: return '>';	// succeeds
    case 0x22B0: return '<';	// precedes under relation
    case 0x22B1: return '>';	// succeeds under relation
    case 0x222A:		// union
    case 0x22C3: return 'U';	// n-ary union
    case 0x22C5: return 0xB7;	// dot operator
    case 0x23AF: return '-';	// horizontal line extension
    case 0x2500:		// box drawings light horizontal
    case 0x2501: return '-';	// box drawings heavy horizontal
    case 0x2502:		// box drawings light vertical
    case 0x2503: return '|';	// box drawings heavy vertical
    case 0x2514: return '`';	// box drawings light up and right
    case 0x2571: return '/';	// box drawings light diagonal upper right
    case 0x2572: return '\\';	// box drawings light diagonal upper left
    case 0x27E8: return '<';	// mathematical left angle bracket
    case 0x27E9: return '>';	// mathematical right angle bracket
    case 0x3001: return ',';	// ideographic comma
    case 0x3002: return '.';	// ideographic full stop
    case 0xFFFD: return '?';	// replacement character (unknown character)
    }
  if( code < 256 ) return code;
//  if( code >= 0x02B0 && code <= 0x02FF ) return 256;	// spacing modifier letters
//  if( code >= 0x0300 && code <= 0x036F ) return 256;	// combining diacritical marks
  if( code >= 0x0660 && code <= 0x0669 ) return code - 0x0660 + '0';	// arabic-indic digits
  if( ( code >= 0x2000 && code <= 0x200B ) ||
      code == 0x202F || code == 0x205F || code == 0x3000 ) return ' ';
  if( code == 0x200C || code == 0x200D ) return 256;	// zero width (non-)joiner
  if( ( code >= 0x2010 && code <= 0x2013 ) || code == 0x2043 ) return '-';
  if( ( code >= 0x2018 && code <= 0x201B ) || code == 0x2032 ) return '\'';
  if( ( code >= 0x201C && code <= 0x201F ) || code == 0x2033 || code == 0x2036 )
    return '"';
  if( code >= 0x239B && code <= 0x23AD ) return '|';
  return -1;
  }


// Decode UCS to string of ISO-8859-15 chars. Return 0 if code not found.
const char * map_to_string( const int code )
  {
  switch( code )
    {
    case 0x00BC: return "1/4";	// vulgar fraction one quarter
    case 0x00BD: return "1/2";	// vulgar fraction one half
    case 0x00BE: return "3/4";	// vulgar fraction three quarters
    case 0x0132: return "IJ";	// latin capital ligature ij
    case 0x0133: return "ij";	// latin small ligature ij
    case 0x0398: return "TH";	// greek capital letter theta
    case 0x03A7: return "CH";	// greek capital letter chi
    case 0x03A8: return "PS";	// greek capital letter psi
    case 0x03B8: return "th";	// greek small letter theta
    case 0x03C7: return "ch";	// greek small letter chi
    case 0x03C8: return "ps";	// greek small letter psi
    case 0x03D1: return "th";	// greek theta symbol
    case 0x03F4: return "TH";	// greek capital theta symbol
    case 0x0416: return "Zh";	// cyrillic capital letter zhe
    case 0x0427: return "Ch";	// cyrillic capital letter che
    case 0x0428: return "Sh";	// cyrillic capital letter sha
    case 0x042E: return "Yu";	// cyrillic capital letter yu
    case 0x042F: return "Ya";	// cyrillic capital letter ya
    case 0x0436: return "zh";	// cyrillic small letter zhe
    case 0x0447: return "ch";	// cyrillic small letter che
    case 0x0448: return "sh";	// cyrillic small letter sha
    case 0x044E: return "yu";	// cyrillic small letter yu
    case 0x044F: return "ya";	// cyrillic small letter ya
    case 0x04D4: return "AE";	// cyrillic capital ligature a ie
    case 0x04D5: return "ae";	// cyrillic small ligature a ie
    case 0x0539: return "T'";	// armenian capital letter to
    case 0x053A: return "ZH";	// armenian capital letter zhe
    case 0x0542: return "GH";	// armenian capital letter ghad
    case 0x0543: return "CH";	// armenian capital letter cheh
    case 0x0547: return "SH";	// armenian capital letter sha
    case 0x0549: return "CH'";	// armenian capital letter cha
    case 0x054C: return "RR";	// armenian capital letter ra
    case 0x0553: return "P'";	// armenian capital letter piwr
    case 0x0554: return "K'";	// armenian capital letter keh
    case 0x0569: return "t'";	// armenian small letter to
    case 0x056A: return "zh";	// armenian small letter zhe
    case 0x0572: return "gh";	// armenian small letter ghad
    case 0x0573: return "ch";	// armenian small letter cheh
    case 0x0577: return "sh";	// armenian small letter sha
    case 0x0579: return "ch'";	// armenian small letter cha
    case 0x057C: return "rr";	// armenian small letter ra
    case 0x0583: return "p'";	// armenian small letter piwr
    case 0x0584: return "k'";	// armenian small letter keh
    case 0x10DF: return "zh";	// georgian letter zhar
    case 0x10E4: return "p'";	// georgian letter phar
    case 0x10E5: return "k'";	// georgian letter khar
    case 0x10E6: return "gh";	// georgian letter ghan
    case 0x10E8: return "sh";	// georgian letter shin
    case 0x10E9: return "ch";	// georgian letter chin
    case 0x10EA: return "ts";	// georgian letter can
    case 0x10EB: return "dz";	// georgian letter jil
    case 0x10EC: return "ts'";	// georgian letter cil
    case 0x10ED: return "ch'";	// georgian letter char
    case 0x1E9E: return "SS";	// latin capital letter sharp s (german)
    case 0x2014:		// em dash
    case 0x2015: return "--";	// horizontal bar
    case 0x2016: return "||";	// double vertical line (norm of matrix)
    case 0x2017: return "__";	// double low line
    case 0x2025: return "..";
    case 0x2026: return "...";
    case 0x2034: return "'''";
    case 0x2037: return "```";
    case 0x203C: return "!!";
    case 0x2047: return "??";
    case 0x2048: return "?!";
    case 0x2049: return "!?";
    case 0x2057: return "''''";
    case 0x2116: return "N";	// numero sign
    case 0x2122: return "TM";	// trade mark sign
    case 0x2190: return "<-";	// leftwards arrow
    case 0x2192: return "->";	// rightwards arrow
    case 0x2194: return "<->";	// left right arrow
    case 0x21A6: return "|->";	// rightwards arrow from bar
    case 0x21B2: return "<-'";	// downwards arrow with tip leftwards
    case 0x21B3: return "`->";	// downwards arrow with tip rightwards
    case 0x21D0: return "<=";	// leftwards double arrow
    case 0x21D2: return "=>";	// rightwards double arrow
    case 0x21D4: return "<=>";	// left right double arrow
    case 0x221A: return "\\/";	// square root
    case 0x221E: return "oo";	// infinity
    case 0x2237: return "::";	// proportion
    case 0x2262: return "!=";	// not identical to
    case 0x2264: return "<=";	// less-than or equal to
    case 0x2265: return ">=";	// greater-than or equal to
    case 0x227C: return "<=";	// precedes or equal to
    case 0x227D: return ">=";	// succeeds or equal to
    case 0x22A3: return "-|";	// left tack
    case 0x22EE:		// vertical ellipsis
    case 0x22EF: return "";	// midline horizontal ellipsis
    case 0x230A: return "|_";	// left floor
    case 0x230B: return "_|";	// right floor
    case 0x2639: return ":-(";	// white frowning face
    case 0x263A:		// white smiling face
    case 0x263B: return ":-)";	// black smiling face
    // alphabetic presentation forms
    case 0xFB00: return "ff";	// latin small ligature ff
    case 0xFB01: return "fi";	// latin small ligature fi
    case 0xFB02: return "fl";	// latin small ligature fl
    case 0xFB03: return "ffi";	// latin small ligature ffi
    case 0xFB04: return "ffl";	// latin small ligature ffl
    case 0xFB05: return "ft";	// latin small ligature long s t
    case 0xFB06: return "st";	// latin small ligature st
    }
  return 0;
  }


int map_to_ucs( const unsigned char ch )
  {
  switch( ch )
    {
    case 0xA4: return 0x20AC;	// euro sign
    case 0xA6: return 0x0160;	// latin capital letter s with caron
    case 0xA8: return 0x0161;	// latin small letter s with caron
    case 0xB4: return 0x017D;	// latin capital letter z with caron
    case 0xB8: return 0x017E;	// latin small letter z with caron
    case 0xBC: return 0x0152;	// latin capital ligature oe
    case 0xBD: return 0x0153;	// latin small ligature oe
    case 0xBE: return 0x0178;	// latin capital letter y with diaeresis
    }
  return ch;
  }

} // end namespace Encoding


void Encoding::base64_encode( const std::string & in, std::string & out )
  {
  const unsigned char b64str[65] =
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

  out.clear();
  for( unsigned long i = 0; i < in.size(); i += 3 )
    {
    const bool s1 = i + 1 < in.size();
    const bool s2 = i + 2 < in.size();
    const unsigned char c0 = in[i];
    const unsigned char c1 = s1 ? in[i+1] : 0;
    const unsigned char c2 = s2 ? in[i+2] : 0;
    out += b64str[(c0 >> 2) & 0x3F];
    out += b64str[((c0 << 4) + (c1 >> 4)) & 0x3F];
    out += s1 ? b64str[((c1 << 2) + (c2 >> 6)) & 0x3F] : '=';
    out += s2 ? b64str[c2 & 0x3F] : '=';
    }
  }


/* Decode base64 encoded input string 'in' to output string 'out'.
   Return true if decoding was successful, i.e., if the input was valid
   base64 data. Note that as soon as any invalid character is found,
   decoding is stopped, the index of the invalid 4-byte group is stored in
   *idxp, and false is returned. This means that any line terminators must
   be removed from the input string before calling this function.
*/
bool Encoding::base64_decode( const std::string & in, std::string & out,
                              long * const idxp )
  {
  unsigned long i;
  out.clear();

  for( i = 0; i + 3 < in.size(); i += 4 )
    {
    const int i0 = base64_value( in[i] );
    const int i1 = base64_value( in[i+1] );
    if( i0 < 0 || i1 < 0 ) break;
    out += ( i0 << 2 ) | ( i1 >> 4 );
    if( in[i+2] == '=' )
      { if( i + 4 != in.size() || in[i+3] != '=' ) break; }
    else
      {
      const int i2 = base64_value( in[i+2] );
      if( i2 < 0 ) break;
      out += ( ( i1 << 4 ) & 0xF0 ) | ( i2 >> 2 );
      if( in[i+3] == '=')
        { if( i + 4 != in.size() ) break; }
      else
	{
        const int i3 = base64_value( in[i+3] );
        if( i3 < 0 ) break;
        out += ( ( i2 << 6 ) & 0xC0 ) | i3;
	}
      }
    }
  if( idxp ) *idxp = i;
  return i == in.size();
  }


/* Decode quoted-printable encoded input string 'in' to output string 'out'.
   Return 0 if decoding was successful, i.e., if the input was valid
   quoted-printable data. Note that as soon as any invalid character is
   found, decoding is stopped, the index of the invalid character is stored
   in *idxp, and false is returned.
*/
bool Encoding::quoted_printable_decode( const std::string & in,
                                        std::string & out, long * const idxp )
  {
  unsigned long i;
  out.clear();

  for( i = 0; i < in.size(); ++i )
    {
    const unsigned char ch = in[i];
    if( ch != '=' ) { out += ch; continue; }
    if( i + 1 < in.size() )
      {
      const unsigned char ch1 = in[i+1];
      if( ch1 == '\n' ) { ++i; continue; }
      if( i + 2 >= in.size() ) break;
      const unsigned char ch2 = in[i+2];
      if( ch1 == '\r' )
        { if( ch2 == '\n' ) { i += 2; continue; } else break; }
      const int i1 = ISO_8859::xvalue( ch1 );
      const int i2 = ISO_8859::xvalue( ch2 );
      if( i1 < 0 || i2 < 0 || std::islower( ch1 ) || std::islower( ch2 ) )
        break;
      out += ( i1 << 4 ) + i2;
      i += 2;
      }
    }
  if( idxp ) *idxp = i;
  return i == in.size();
  }


unsigned char Encoding::rot13( const unsigned char ch )
  {
  switch( ch )
    {
    case 'A': return 'N';
    case 'B': return 'O';
    case 'C': return 'P';
    case 'D': return 'Q';
    case 'E': return 'R';
    case 'F': return 'S';
    case 'G': return 'T';
    case 'H': return 'U';
    case 'I': return 'V';
    case 'J': return 'W';
    case 'K': return 'X';
    case 'L': return 'Y';
    case 'M': return 'Z';
    case 'N': return 'A';
    case 'O': return 'B';
    case 'P': return 'C';
    case 'Q': return 'D';
    case 'R': return 'E';
    case 'S': return 'F';
    case 'T': return 'G';
    case 'U': return 'H';
    case 'V': return 'I';
    case 'W': return 'J';
    case 'X': return 'K';
    case 'Y': return 'L';
    case 'Z': return 'M';
    case 'a': return 'n';
    case 'b': return 'o';
    case 'c': return 'p';
    case 'd': return 'q';
    case 'e': return 'r';
    case 'f': return 's';
    case 'g': return 't';
    case 'h': return 'u';
    case 'i': return 'v';
    case 'j': return 'w';
    case 'k': return 'x';
    case 'l': return 'y';
    case 'm': return 'z';
    case 'n': return 'a';
    case 'o': return 'b';
    case 'p': return 'c';
    case 'q': return 'd';
    case 'r': return 'e';
    case 's': return 'f';
    case 't': return 'g';
    case 'u': return 'h';
    case 'v': return 'i';
    case 'w': return 'j';
    case 'x': return 'k';
    case 'y': return 'l';
    case 'z': return 'm';
    default: return ch;
    }
  }


unsigned char Encoding::rot47( const unsigned char ch )
  {
  if( ch >= 33 && ch <= 126 )
    { if( ch <= 79 ) return ch + 47; else return ch - 47; }
  return ch;
  }


/* Encode ISO-8859-15 encoded input string 'in' to output string 'out'.
   Return false if no encoding is needed, i.e., if the input is already
   valid ASCII data. Any ASCII text in 'in' is copied unmodified.
*/
bool Encoding::ascii_encode( const std::string & in, std::string & out )
  {
  bool modified = false;
  out.clear();
  for( unsigned long i = 0; i < in.size(); )
    {
    unsigned long j = i;
    while( i < in.size() && (unsigned char)in[i] < 128 ) ++i;
    if( i > j && ( modified || i < in.size() ) ) out.append( in, j, i - j );
    if( i < in.size() )
      {
      modified = true;
      ISO_8859::map_to_ascii( in[i++], out );
      }
    }
  return modified;
  }


void Encoding::ucs_to_utf8( const int code, std::string & out )
  {
  if( code < 0 || code > 0x7FFFFFFF ) return;		// invalid code
  if( code < 128 ) { out += code; return; }		// plain ascii

  int i, mask;
  if( code < 0x800 ) { i = 6; mask = 0xC0; }		// 110X XXXX
  else if( code < 0x10000 ) { i = 12; mask = 0xE0; }	// 1110 XXXX
  else if( code < 0x200000 ) { i = 18; mask = 0xF0; }	// 1111 0XXX
  else if( code < 0x4000000 ) { i = 24; mask = 0xF8; }	// 1111 10XX
  else { i = 30; mask = 0xFC; }				// 1111 110X

  out += mask | ( code >> i );
  while( ( i -= 6 ) >= 0 )
    out += 0x80 | ( ( code >> i ) & 0x3F );		// 10XX XXXX
  }


/* Encode ISO-8859-15 encoded input string 'in' to output string 'out'.
   Return false if no encoding is needed, i.e., if the input is already
   valid UTF-8 data.
   If force is true and any character is >= 128, encoding is performed
   unconditionally.
*/
bool Encoding::utf8_encode( const std::string & in, std::string & out,
                            const bool force )
  {
  bool modified = false;
  out.clear();

  for( unsigned long i = 0; i < in.size(); )
    {
    if( (unsigned char)in[i] < 128 ) { ++i; continue; }
    int len;
    if( force || utf8_to_ucs( in, i, &len ) < 0 ) { modified = true; break; }
    i += len;
    }
  if( modified )
    for( unsigned long i = 0; i < in.size(); ++i )
      ucs_to_utf8( map_to_ucs( in[i] ), out );
  return modified;
  }


/* 'seq' contains an UTF-8 (possibly) multibyte character sequence.
   Return the corresponding code and, in *lenp, the number of bytes read.
   Return -1 if error.
*/
int Encoding::utf8_to_ucs( const std::string & seq, const unsigned long i,
                           int * const lenp )
  {
  if( i >= seq.size() ) return -1;
  int len = 1;
  const unsigned char first = seq[i];
  if( first < 128 )					// plain ascii
    { if( lenp ) { *lenp = len; } return first; }
  if( first < 192 || first > 253 ) return -1;		// invalid byte

  ++len;
  unsigned char bit = 0x20, mask = 0x1F;
  while( first & bit ) { ++len; bit >>= 1; mask >>= 1; }
  int code = first & mask;

  for( int j = 1; j < len; ++j )
    {
    unsigned char next = seq[i+j];
    if( ( next & 0xC0 ) != 0x80 ) return -1;		// invalid byte
    code = ( code << 6 ) | ( next & 0x3F );
    }

  if( code < 0x80 || ( len > 2 && code < 0x800 << ( ( len - 3 ) * 5 ) ) )
    return -1;						// no minimum length
  if( lenp ) *lenp = len;
  return code;
  }


/* Decode UTF-8 encoded input string 'in' to output string 'out'.
   Return 0 if decoding was successful, i.e., if the input was valid UTF-8
   data in the ISO-8859-[1|15] range. Note that as soon as any invalid
   character is found, decoding is stopped, the index of the invalid
   character is stored in *idxp, and:
     if invalid UTF-8 data is found then -1 is returned,
     else the UCS code of the first character out of range is returned.
   If force is true, out of range UTF-8 characters are copied unmodified
   instead of returning an error.
*/
int Encoding::utf8_decode( const std::string & in, std::string & out,
                           long * const idxp, const bool force )
  {
  out.clear();
  for( unsigned long i = 0; i < in.size(); )
    {
    int len;
    const int code = utf8_to_ucs( in, i, &len );
    if( code < 0 ) { if( idxp ) { *idxp = i; } return -1; }
    const int ch = map_to_byte( code );
    if( ch >= 0 ) { if( ch < 256 ) { out += ch; } i += len; continue; }
    const char * const str = map_to_string( code );
    // separate fraction 1/4, 1/2, or 3/4 from preceding digit
    if( str && code >= 0xBC && code <= 0xBE && !out.empty() &&
        std::isdigit( (unsigned char)out.end()[-1] ) ) out += ' ';
    if( str ) out += str;
    else if( code == 0xFEFF ) {}	// remove UTF-8 Byte Order Mark (BOM)
    else if( force ) out.append( in, i, len );
    else { if( idxp ) { *idxp = i; } return code; }
    i += len;
    }
  return 0;
  }


/* Copy the UTF-8 encoded input string 'in' to output string 'out',
   removing any out of range UTF-8 characters.
   Return 1 if the copy was succesful.
   Return 0 if no copy is needed (no out of range UTF-8 characters in 'in').
   Return -1 if an invalid UTF-8 character is found. The index of the
   invalid character is stored in *idxp.
*/
int Encoding::remove_utf8_out_of_range( const std::string & in, std::string & out,
                                        long * const idxp )
  {
  bool modified = false;
  out.clear();
  for( unsigned long i = 0; i < in.size(); )
    {
    int len;
    const int code = utf8_to_ucs( in, i, &len );
    if( code < 0 ) { if( idxp ) { *idxp = i; } return -1; }
    if( map_to_byte( code ) >= 0 || map_to_string( code ) || code == 0xFEFF )
      out.append( in, i, len );
    else modified = true;
    i += len;
    }
  return modified;
  }
