#                                                         -*- Perl -*-
# Copyright (c) 2009  Kazuhiro Ito
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#

sub decode_entity {
  my $text = $_[0];
  my @texts = split (/(&[0-9a-zA-Z]+;|&\#[0-9]+;|&\#x[0-9a-fA-F]+;)/, $text);

  $text = '';
  foreach $_ (@texts) {
    if ($_ =~ /&([0-9a-zA-Z]+);/) {
      if (defined($entity_table{$1})) {
	$text .= $entity_table{$1};
      } else {
	$text .= '?';
      }
    } elsif ($_ =~ /&\#([0-9]+|x[0-9a-fA-F]+);/) {
      $text .= decode_utf8($1);
    } else {
      $text .= $_;
    }
  }

  return $text;
}

sub decode_utf8 {
  my $entity = $_[0];

  if ($entity =~ /^x(.*)/) {
    $entity = hex ($1);
  }

  if      ($entity & 0x7c000000) {
    return
	chr(0xfc | (($entity >> 30) & 0x01)).
	chr(0x80 | (($entity >> 24) & 0x3f)).
	chr(0x80 | (($entity >> 18) & 0x3f)).
	chr(0x80 | (($entity >> 12) & 0x3f)).
	chr(0x80 | (($entity >>  6) & 0x3f)).
	chr(0x80 | ($entity & 0x3f));
  } elsif ($entity & 0x03e00000) {
    return
	chr(0xf8 | (($entity >> 24) & 0x03)).
	chr(0x80 | (($entity >> 18) & 0x3f)).
	chr(0x80 | (($entity >> 12) & 0x3f)).
	chr(0x80 | (($entity >>  6) & 0x3f)).
	chr(0x80 | ($entity & 0x3f));
  } elsif ($entity & 0x001f0000) {
    return
	chr(0xf0 | (($entity >> 18) & 0x07)).
	chr(0x80 | (($entity >> 12) & 0x3f)).
	chr(0x80 | (($entity >>  6) & 0x3f)).
	chr(0x80 | ($entity & 0x3f));
  } elsif ($entity & 0x0000f800) {
    return
	chr(0xe0 | (($entity >> 12) & 0x0f)).
	chr(0x80 | (($entity >>  6) & 0x3f)).
	chr(0x80 | ($entity & 0x3f));
  } elsif ($entity & 0x00000780) {
    return
	chr(0xc0 | (($entity >>  6) & 0x1f)).
	chr(0x80 | ($entity & 0x3f));
  }
  return chr($entity);
}

sub format_content_table {
  my $text = $_[0];
  my @texts = split(/(\{\||\|\})/, $text);
  my $level = 0;

  $text = '';
  foreach $_ (@texts) {
    if ($_ eq '{|') {
      $level++;
    } elsif ($_ eq '|}') {
      if ($level == 0) {
	format_content_warning ("opening table tag recognition is failed");
        $text .= $_;
      }	else {
        $level--;
      }
    } elsif ($level == 0) {
      $text .= $_;
    }
  }

  if ($level) {
    format_content_warning("closing table tag recognition is failed");
  }

  return $text;
}

sub format_content_table_html {
  my $text = $_[0];
  my @texts = split(/(<\/?table[^<]*?>)/, $text);
  my $level = 0;

  $text = '';
  foreach $_ (@texts) {
    if ($_ =~ /^<table/) {
      $level++;
    } elsif ($_ eq '</table>') {
      if ($level == 0) {
	format_content_warning ("opening table html tag recognition is failed");
        $text .= $_;
      } else {
        $level--;
      }
    } elsif ($level == 0) {
      $text .= $_;
    }
  }

  if ($level) {
    format_content_warning("closing table html tag recognition is failed");
  }

  return $text;
}

sub format_content_warning {
  my ($message)= @_;

  print "$PROGRAM_NAME: warning: $message.\n";
}

1;
