#                                                         -*- Perl -*-
# Copyright (c) 2007  Kazuhiro Ito
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#

use strict;
use warnings;

use English;

# use vars qw(%fpwwikipedia_conf);
# require "wikipedia-fpw.conf";

use vars qw (%entity_table %utf2euc_table $utf2euc_regexp);

%entity_table =
    (
     '1cond' => '?',
     'AElig' => 'AE',
     'AMP' => '&',
     'Aacute' => 'A\'',
     'Acirc' => 'A^',
     'Agrave' => 'A`',
     'Alpha' => 'A',
     'Amp' => '&',
     'Aring' => 'A*',
     'Atilde' => 'A~',
     'Auml' => 'A"',
     'B' => '?',
     'Beta' => 'B',
     'CS' => '?',
     'Ccedil' => 'C,',
     'Chi' => '?',
     'Dagger' => '‡',
     'Delta'   => 'Δ',
     'Eacute' => 'E\'',
     'Ecirc' => 'E^',
     'Egrave' => 'E`',
     'Epsilon' => 'E',
     'Eta' => 'H',
     'Euml' => 'E"',
     'Gamma'   => 'Γ',
     'Iacute' => 'I\'',
     'Icirc' => 'I^',
     'Igrave' => 'I`',
     'Iota' => 'I',
     'Iuml' => 'I"',
     'Kappa' => 'K',
     'Lambda'  => 'Λ',
     'Mu' => 'M',
     'Ntilde' => 'N~',
     'Nu' => 'N',
     'Oacute' => 'O\'',
     'Ocirc' => 'O^',
     'Ograve' => 'O`',
     'Omega'   => 'Ω',
     'Omicron' => '?',
     'Oslash' => '?',
     'Otilde' => 'O~',
     'Ouml' => 'O"',
     'Phi'     => 'Φ',
     'Pi'      => 'Π',
     'Prime'   => '″',
     'Psi'     => 'Ψ',
     'Rho' => 'P',
     'Scaron' => '?',
     'Sigma'   => 'Σ',
     'THORN' => '?',
     'Tau' => 'T',
     'Theta'   => 'Θ',
     'Uacute' => 'U\'',     
     'Ucirc' => 'U^',
     'Ugrave' => 'U`',
     'Upsilon' => 'Y',
     'Uuml' => '?',
     'Xi'      => 'Ξ',
     'Yuml' => '?',
     'Zeta' => 'Z',
     'a' => '?',
     'aacute' => 'a\'',
     'acirc' => 'a^',
     'acute' => '´',
     'aelig' => 'ae',
     'agrave' => 'a`',
     'alefsym' => '?',
     'alpha'   => 'α',
     'amp' => '&',
     'amp1497' => '?',
     'ampgt' => '?',
     'amplt' => '?',
     'amul' => '?',
     'and'     => '∧',
     'ang' => '?',
     'apos' => '\'',
     'aring' => '?',
     'asymp'   => '?',
     'atilde' => 'a~',
     'auml' => '?',
     'bdquo' => '?',
     'beta'    => 'β',
     'brvbar' => '?',
     'bull' => '・',
     'cap'     => '∩',
     'ccedil' => 'c,',
     'cedil' => ',',
     'cent' => '￠',
     'chi' => 'χ',
     'circ' => '^',
     'circle' => '?',
     'clubs' => '?',
     'copy' => '(c)',
     'cup'     => '∪',
     'curren' => '?',
     'dagger' => '†',
     'darr'    => '↓',
     'dash' => '?',
     'deg'     => '°',
     'delta'   => 'δ',
     'diams' => '?',
     'divide'  => '÷',
     'eacute' => 'e\'',
     'ecirc' => 'e^',
     'egrave' => 'e`',
     'emdash' => '?',
     'empty' => '?',
     'emsp' => '?',
     'ensp' => '?',
     'epsilon' => 'ε',
     'equiv'   => '≡',
     'eta'     => 'η',
     'eta' => 'η',
     'eth' => '?',
     'euml' => '?',
     'euro' => 'EUR',
     'exist'   => '∃',
     'forall'  => '∀',
     'frac12' => '1/2',
     'frac14' => '1/4',
     'frasl' => '?',
     'gamma'   => 'γ',
     'ge' => '≧',
     'grave' => '`',
     'gt' => '>',
     'hArr'    => '⇔',
     'harr' => '?',
     'heart' => '?',
     'hearts' => '?',
     'hellip' => '?',
     'iacute' => 'i\'',
     'iaquo' => '?',
     'icirc' => 'i^',
     'iexcl' => '?',
     'igrave' => 'i`',
     'infin'   => '∞',
     'int'     => '∫',
     'iota'    => 'ι',
     'iquest' => '?',
     'isin'    => '∈',
     'iuml' => '?',
     'kappa'   => 'κ',
     'lArr' => '?',
     'lambda'  => 'λ',
     'laquo' => '≪',
     'larr'    => '←',
     'ldquo' => '“',
     'le' => '≦',
     'lowast' => '?',
     'loz' => '?',
     'lrm' => '?',
     'lsaquo' => '〈',
     'lsquo' => '‘',
     'lt' => '<',
     'macr' => '?',
     'mdas' => '?',
     'mdash' => '―',
     'micro' => '?',
     'middot'  => '・',
     'minus'   => '－',
     'mu'      => 'μ',
     'mul' => '?',
     'nabla'   => '∇',
     'nbsp' => ' ',
     'ndash' => '--',
     'ndsh' => '--',
     'ne'      => '≠',
     'ni' => '?',
     'not'     => '￢',
     'notin'   => '?',
     'ntilde' => 'n~',
     'nu'      => 'ν',
     'oacute' => 'o\'',
     'ocirc' => 'o^',
     'oelig' => '?',
     'ograve' => 'o`',
     'omega' => 'ω',
     'omicron' => 'ο',
     'oplus' => '?',
     'or'      => '∨',
     'ordf'    => '?',
     'ordm'    => '?',
     'oslash'  => '?',
     'otilde' => 'o~',
     'otimes' => '?',
     'ouml' => '?',
     'para' => '¶',
     'part'    => '∂',
     'pd' => '?',
     'permil'  => '‰',
     'perp' => '?',
     'phi' => 'φ',
     'pi'      => 'π',
     'plusmn'  => '±',
     'pm' => '?',
     'pound' => '￡',
     'prime'   => '′',
     'prod'    => '?',
     'prop'    => '∝',
     'psi' => 'ψ',
     'quot' => '"',
     'rArr'    => '⇒',
     'radic'   => '√',
     'raquo' => '≫',
     'rarr'    => '→',
     'rdquo' => '”',
     'reg' => '(R)',
     'rho'     => 'ρ',
     'rsaquo' => '〉',
     'rsquo' => '’',
     'sbquo' => '?',
     'scaron' => '?',
     'scedil' => 's,',
     'sdot' => '?',
     'sect' => '§',
     'sigma'   => 'σ',
     'sigmaf'  => '?',
     'sim' => '?',
     'spades' => '?',
     'sub'     => '⊂',
     'sube'    => '⊆',
     'sum'     => '?',
     'sup'     => '⊃',
     'sup1' => '^1',
     'sup2' => '^2',
     'sup3' => '^3',
     'supe'    => '⊇',
     'supm2' => '?',
     'szlig' => '?',
     'tau'     => 'τ',
     'there4'  => '∴',
     'theta'   => 'θ',
     'thinsp' => '?',
     'thorn' => '?',
     'tilde' => '~',
     'times'   => '×',
     'trade' => '(TM)',
     'uacute' => 'u\'',
     'uarr'    => '↑',
     'ucirc' => 'u^',
     'ugrave' => 'u`',
     'uml' => '¨',
     'upsilon' => 'u',
     'uuml' => 'u¨',
     'xi'      => 'ξ',
     'yacute' => 'y\'',
     'yen' => '￥',
     'yuml' => 'y¨',
     'zeta'    => 'ζ',
    );

%utf2euc_table =
    (
     # (FULLWIDTH TILDE, U+FF5E) -> (WAVE DASH, U+301C)
     "\xEF\xBD\x9E" => "\xE3\x80\x9C",
     # (FULLWIDTH HYPHEN-MINUS, U+FF0D) -> (MINUS SIGN, U+2212)
     "\xEF\xBC\x8D" => "\xE2\x88\x92",
     'Ĉ' => 'C^',
     'ĉ' => 'c^',
     'Ĝ' => 'G^',
     'ĝ' => 'g^',
     'Ĥ' => 'H^',
     'ĥ' => 'h^',
     'Ĵ' => 'J^',
     'ĵ' => 'j^',
     'Ŝ' => 'S^',
     'ŝ' => 's^',
#     'Ŭ' => 'U',
#     'ŭ' => 'u',
    );

$utf2euc_regexp = '(';
foreach $_ (keys(%utf2euc_table)) {
  $utf2euc_regexp .= "$_|";
}
$utf2euc_regexp =~ s/\|$/\)/;

# Following line must be at the end of this file.
1;
