#                                                         -*- Perl -*-
# Copyright (c) 2007-2009  Kazuhiro Ito
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#

use strict;
use warnings;

use English;

# use vars qw(%fpwwikipedia_conf);
# require "wikipedia-fpw.conf";

use vars qw (%entity_table %utf2euc_table $utf2euc_regexp);

%entity_table =
    (
     '1cond' => '?',
     'AElig' => 'AE',
     'AMP' => '&',
     'Aacute' => 'A\'',
     'Acirc' => 'A^',
     'Agrave' => 'A`',
     'Alpha' => 'A',
     'Amp' => '&',
     'Aring' => 'A*',
     'Atilde' => 'A~',
     'Auml' => 'A"',
#     'B' => '?',
     'Beta' => 'B',
#     'CS' => '?',
     'Ccedil' => 'C,',
#     'Chi' => '?',
     'Dagger' => '‡',
     'Delta'   => 'Δ',
     'Eacute' => 'E\'',
     'Ecirc' => 'E^',
     'Egrave' => 'E`',
     'Epsilon' => 'E',
     'Eta' => 'H',
     'Euml' => 'E"',
     'Gamma'   => 'Γ',
     'Iacute' => 'I\'',
     'Icirc' => 'I^',
     'Igrave' => 'I`',
     'Iota' => 'I',
     'Iuml' => 'I"',
     'Kappa' => 'K',
     'Lambda'  => 'Λ',
     'Mu' => 'M',
     'Ntilde' => 'N~',
     'Nu' => 'N',
     'Oacute' => 'O\'',
     'Ocirc' => 'O^',
     'Ograve' => 'O`',
     'Omega'   => 'Ω',
#     'Omicron' => '?',
     'Oslash' => 'O/',
     'Otilde' => 'O~',
     'Ouml' => 'O"',
     'Phi'     => 'Φ',
     'Pi'      => 'Π',
     'Prime'   => '″',
     'Psi'     => 'Ψ',
     'Rho' => 'P',
#     'Scaron' => '?',
     'Sigma'   => 'Σ',
#     'THORN' => '?',
     'Tau' => 'T',
     'Theta'   => 'Θ',
     'Uacute' => 'U\'',     
     'Ucirc' => 'U^',
     'Ugrave' => 'U`',
     'Upsilon' => 'Y',
     'Uuml' => 'U"',
     'Xi'      => 'Ξ',
     'Yuml' => 'Y"',
     'Zeta' => 'Z',
#     'a' => '?',
     'aacute' => 'a\'',
     'acirc' => 'a^',
     'acute' => '´',
     'aelig' => 'ae',
     'agrave' => 'a`',
#     'alefsym' => '?',
     'alpha'   => 'α',
     'amp' => '&',
#     'amp1497' => '?',
#     'ampgt' => '?',
#     'amplt' => '?',
#     'amul' => '?',
     'and'     => '∧',
#     'ang' => '?',
     'apos' => '\'',
#     'aring' => '?',
#     'asymp'   => '?',
     'atilde' => 'a~',
     'auml' => 'a"',
#     'bdquo' => '?',
     'beta'    => 'β',
#     'brvbar' => '?',
     'bull' => '・',
     'cap'     => '∩',
     'ccedil' => 'c,',
     'cedil' => ',',
     'cent' => '￠',
     'chi' => 'χ',
     'circ' => '^',
#     'circle' => '?',
#     'clubs' => '?',
     'copy' => '(c)',
     'cup'     => '∪',
#     'curren' => '?',
     'dagger' => '†',
     'darr'    => '↓',
#     'dash' => '?',
     'deg'     => '°',
     'delta'   => 'δ',
#     'diams' => '?',
     'divide'  => '÷',
     'eacute' => 'e\'',
     'ecirc' => 'e^',
     'egrave' => 'e`',
#     'emdash' => '?',
#     'empty' => '?',
#     'emsp' => '?',
#     'ensp' => '?',
     'epsilon' => 'ε',
     'equiv'   => '≡',
     'eta'     => 'η',
     'eta' => 'η',
#     'eth' => '?',
     'euml' => 'e"',
     'euro' => 'EUR',
     'exist'   => '∃',
     'forall'  => '∀',
     'frac12' => '1/2',
     'frac14' => '1/4',
#     'frasl' => '?',
     'gamma'   => 'γ',
     'ge' => '≧',
     'grave' => '`',
     'gt' => '>',
     'hArr'    => '⇔',
#     'harr' => '?',
#     'heart' => '?',
#     'hearts' => '?',
#     'hellip' => '?',
     'iacute' => 'i\'',
#     'iaquo' => '?',
     'icirc' => 'i^',
#     'iexcl' => '?',
     'igrave' => 'i`',
     'infin'   => '∞',
     'int'     => '∫',
     'iota'    => 'ι',
#     'iquest' => '?',
     'isin'    => '∈',
     'iuml' => 'i"',
     'kappa'   => 'κ',
#     'lArr' => '?',
     'lambda'  => 'λ',
     'laquo' => '≪',
     'larr'    => '←',
     'ldquo' => '“',
     'le' => '≦',
#     'lowast' => '?',
#     'loz' => '?',
#     'lrm' => '?',
     'lsaquo' => '〈',
     'lsquo' => '‘',
     'lt' => '<',
#     'macr' => '?',
#     'mdas' => '?',
     'mdash' => '―',
     'micro' => '?',
     'middot'  => '・',
     'minus'   => '－',
     'mu'      => 'μ',
#     'mul' => '?',
     'nabla'   => '∇',
     'nbsp' => ' ',
     'ndash' => '--',
     'ndsh' => '--',
     'ne'      => '≠',
#     'ni' => '?',
     'not'     => '￢',
#     'notin'   => '?',
     'ntilde' => 'n~',
     'nu'      => 'ν',
     'oacute' => 'o\'',
     'ocirc' => 'o^',
     'oelig' => 'oe',
     'ograve' => 'o`',
     'omega' => 'ω',
     'omicron' => 'ο',
#     'oplus' => '?',
     'or'      => '∨',
#     'ordf'    => '?',
#     'ordm'    => '?',
     'oslash'  => 'o/',
     'otilde' => 'o~',
#     'otimes' => '?',
     'ouml' => 'o"',
     'para' => '¶',
     'part'    => '∂',
#     'pd' => '?',
     'permil'  => '‰',
#     'perp' => '?',
     'phi' => 'φ',
     'pi'      => 'π',
     'plusmn'  => '±',
#     'pm' => '?',
     'pound' => '￡',
     'prime'   => '′',
#     'prod'    => '?',
     'prop'    => '∝',
     'psi' => 'ψ',
     'quot' => '"',
     'rArr'    => '⇒',
     'radic'   => '√',
     'raquo' => '≫',
     'rarr'    => '→',
     'rdquo' => '”',
     'reg' => '(R)',
     'rho'     => 'ρ',
     'rsaquo' => '〉',
     'rsquo' => '’',
#     'sbquo' => '?',
#     'scaron' => '?',
     'scedil' => 's,',
#     'sdot' => '?',
     'sect' => '§',
     'sigma'   => 'σ',
#     'sigmaf'  => '?',
#     'sim' => '?',
#     'spades' => '?',
     'sub'     => '⊂',
     'sube'    => '⊆',
#     'sum'     => '?',
     'sup'     => '⊃',
     'sup1' => '^1',
     'sup2' => '^2',
     'sup3' => '^3',
     'supe'    => '⊇',
#     'supm2' => '?',
#     'szlig' => '?',
     'tau'     => 'τ',
     'there4'  => '∴',
     'theta'   => 'θ',
#     'thinsp' => '?',
#     'thorn' => '?',
     'tilde' => '~',
     'times'   => '×',
     'trade' => '(TM)',
     'uacute' => 'u\'',
     'uarr'    => '↑',
     'ucirc' => 'u^',
     'ugrave' => 'u`',
     'uml' => '¨',
     'upsilon' => 'u',
     'uuml' => 'u¨',
     'xi'      => 'ξ',
     'yacute' => 'y\'',
     'yen' => '￥',
     'yuml' => 'y¨',
     'zeta'    => 'ζ',
    );

%utf2euc_table =
    (
     # (FULLWIDTH TILDE, U+FF5E) -> (WAVE DASH, U+301C)
     "\xEF\xBD\x9E" => "\xE3\x80\x9C",
     # (FULLWIDTH HYPHEN-MINUS, U+FF0D) -> (MINUS SIGN, U+2212)
     "\xEF\xBC\x8D" => "\xE2\x88\x92",
     "\xc2\xb2" => '^2',
     # (EN DASH, U+2013) -> (HYPHEN-MINUS, U+002D)
     "\xE2\x80\x93" => '-',
     '«' => '≪',
     '»' => '≫',
     'À' => 'A`',
     'Á' => 'A\'',
     'Â' => 'A^',
     'Ã' => 'A~',
     'Ä' => 'A"',
     'Å' => 'A°',
     'Æ' => 'AE',
     'È' => 'E`',
     'É' => 'E\'',
     'Ê' => 'E^',
     'Ë' => 'E"',
     'Ì' => 'I`',
     'Í' => 'I\'',
     'Î' => 'I^',
     'Ï' => 'I"',
     'Ò' => 'O`',
     'Ó' => 'O\'',
     'Ô' => 'O^',
     'Õ' => 'O~',
     'Ö' => 'O"',
     'Ù' => 'U\`',
     'Ú' => 'U\'',
     'Û' => 'U^',
     'Ü' => 'U"',
     'à' => 'a`',
     'á' => 'a\'',
     'â' => 'a^',
     'ã' => 'a~',
     'ä' => 'a"',
     'å' => 'a°',
     'æ' => 'ae',
     'ç' => 'c,',
     'è' => 'e`',
     'é' => 'e\'',
     'ê' => 'e^',
     'ë' => 'e"',
     'ì' => 'i`',
     'í' => 'i\'',
     'î' => 'i^',
     'ï' => 'i"',
     'ò' => 'o`',
     'ó' => 'o\'',
     'ô' => 'o^',
     'õ' => 'o~',
     'ö' => 'o"',
     'ù' => 'u`',
     'ú' => 'u\'',
     'û' => 'u^',
     'ü' => 'u"',
     'Ľ' => 'L',
     'Š' => 'S',
     'Ť' => 'T\'',
     'Ž' => 'Z',
     'ľ' => 'l',
     'š' => 's',
     'ť' => 't\'',
     'ž' => 'z',
     'Č' => 'C',
     'Ě' => 'E',
     'Ď' => 'D\'',
     'Ň' => 'N',
     'Ř' => 'R',
     'č' => 'c',
     'ě' => 'e',
     'ď' => 'd\'',
     'ň' => 'n',
     'ř' => 'r',
     'Ĥ' => 'H^',
     'Ĵ' => 'J^',
     'ĥ' => 'h^',
     'ĵ' => 'j^',
     'Ĉ' => 'C^',
     'Ĝ' => 'G^',
     'Ŝ' => 'S^',
     'ĉ' => 'c^',
     'ĝ' => 'g^',
     'ŝ' => 's^',
     'Ō' => 'O',
     'ā' => 'a',
     'ī' => 'i',
     'ō' => 'o',
     'ū' => 'u',
     'ˈ' => '\'',
     'Ǎ' => 'A',
     'Ǐ' => 'I',
     'Ǒ' => 'O',
     'Ǔ' => 'U',
     'Ǚ' => 'U',
     'ǎ' => 'a',
     'ǐ' => 'i',
     'ǒ' => 'o',
     'ǔ' => 'u',
     'ǚ' => 'u',
     'ⅰ' => 'i',
     'ⅱ' => 'ii',
     'ⅲ' => 'iii',
     'ⅳ' => 'iv',
     'ⅴ' => 'v',
     'ⅵ' => 'vi',
     'ⅶ' => 'vii',
     'ⅷ' => 'viii',
     'ⅸ' => 'ix',
     'ⅹ' => 'x',
     'ⅺ' => 'xi',
     'ⅻ' => 'xii',
     'Ⅰ' => 'I',
     'Ⅱ' => 'II',
     'Ⅲ' => 'III',
     'Ⅳ' => 'IV',
     'Ⅴ' => 'V',
     'Ⅵ' => 'VI',
     'Ⅶ' => 'VII',
     'Ⅷ' => 'VIII',
     'Ⅸ' => 'IX',
     'Ⅹ' => 'X',
     'Ⅺ' => 'XI',
     'Ⅻ' => 'XII',
     'Ȟ' => 'H',
     'ȟ' => 'h',
     '€' => '[EUR]',
     'Ⅼ' => 'L',
     'Ⅽ' => 'C',
     'Ⅾ' => 'D',
     'Ⅿ' => 'M',
     'ⅼ' => 'l',
     'ⅽ' => 'c',
     'ⅾ' => 'd',
     'ⅿ' => 'm',
    );

$utf2euc_regexp = '(';
foreach $_ (keys(%utf2euc_table)) {
  $utf2euc_regexp .= "$_|";
}
$utf2euc_regexp =~ s/\|$/\)/;

# Following line must be at the end of this file.
1;
