/*                                                            -*- C -*-
 * Copyright (c) 2010  Kazuhiro Ito
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include "../pdicr/pdicr_internal.h"

#include "getopt.h"
extern int opterr;
extern int optind;
extern int optopt;
extern int optreset;
extern char *optarg;

#include <errno.h>
#include <string.h>
#ifdef HAVE_ICONV_H
#include <iconv.h>
#endif

/* internal functions */
void output_help ();
void set_output_format (char *format);
void set_coding (char *coding, char **ptr);
void set_coding_internal (char *coding, char **ptr);
void set_pron_coding (char *coding);
PDICR_Error_Code content_get
(char **keyword, char **heading, char **text, char **citation, char **pron);
PDICR_Error_Code output_content_as_pdic1 ();
PDICR_Error_Code output_content_as_csv ();
PDICR_Error_Code output_content_as_pdic1_internal (char **string);
PDICR_Error_Code output_content_as_csv_internal (char **string);
PDICR_Error_Code codeconv
(const char *to_code, const char *from_code, const char *string,
 char **to_string);
void output_version (const char *program_name);
void show_error (const char *program_name, const PDICR_Error_Code error);

/* constants */
const char *program_name = "pdicdump";

const char *short_options = "hvH:T:C:o:p:";
struct option long_options[] = {
  {"help",              no_argument,       NULL, 'h'},
  {"version",           no_argument,       NULL, 'v'},
  {"heading-encoding",  required_argument, NULL, 'H'},
  {"text-encoding",     required_argument, NULL, 'T'},
  {"citation-encoding", required_argument, NULL, 'C'},
  {"output-format"    , required_argument, NULL, 'o'},
  {"pron-encoding"    , required_argument, NULL, 'p'},
  {NULL, 0, NULL, 0}
};

/* global variables */
PDICR_Book book;
PDICR_Content content;
char *heading = NULL, *text = NULL, *citation = NULL;
char *heading_coding = NULL, *text_coding = NULL;
char *citation_coding = NULL, *pron_coding = NULL;
PDICR_Error_Code (* output_function) () = output_content_as_csv;

int main (int argc, char **argv) {
  PDICR_Error_Code ret;

  PDICR_Search search;
  int result_count, i;

  /* heading_coding = text_coding = citation_coding = NULL; */

  for (;;) {
    i = getopt_long(argc, argv, short_options, long_options, NULL);
    if (i == -1)
      break;
    switch (i) {
    case 'h':
      output_help();
      exit(0);
    case 'v':
      output_version(program_name);
      exit(0);
    case 'H':
      set_coding(optarg, &heading_coding);
      break;
    case 'T':
      set_coding(optarg, &text_coding);
      break;
    case 'C':
      set_coding(optarg, &citation_coding);
      break;
    case 'o':
      set_output_format(optarg);
      break;
    case 'p':
      set_pron_coding(optarg);
      break;
    default:
      output_help();
      exit(-1);
    }
  }

  if (argc - optind != 1) {
    if (argc == optind)
      fprintf(stderr, "%s: too few arguments\n", program_name);
    else
      fprintf(stderr, "%s: too many arguments\n", program_name);
    output_help();
    exit(-1);
  }

  pdicr_book_initialize(&book);
  pdicr_dump_initialize(&search);
  pdicr_content_initialize(&content);

  if ((ret = pdicr_book_bind(&book, argv[optind])) != PDICR_SUCCESS) {
    fprintf(stderr, "%s: error occurs in pdicr_book_bind.\n", program_name);
    show_error(program_name, ret);
    return ret;
  }

  /* On BOCU-1 dictionaries, ignore encoding options. */
  if (book.coding == PDICR_Coding_BOCU1) {
    if (heading_coding)  free(heading_coding);
    if (text_coding)     free(text_coding);
    if (citation_coding) free(citation_coding);
    heading_coding = text_coding = citation_coding = NULL;
  }

  /* Set pronounciation table */
  if (pron_coding) {
    if (!strcmp(pron_coding, "asis"))
      if (pdicr_book_set_pron_table_coding
	  (&book, PDICR_Pron_Table_Code_ASIS) != PDICR_SUCCESS)
	fprintf(stderr, 
		"%s:warning, failed to set pron_table coding to '%s'.\n",
		program_name, pron_coding);
      else {}
    else if (!strcmp(pron_coding, "silipa93"))
      if (pdicr_book_set_pron_table_coding
	  (&book, PDICR_Pron_Table_Code_SILIPA93) != PDICR_SUCCESS)
	fprintf(stderr, 
		"%s:warning, failed to set pron_table coding to '%s'.\n",
		program_name, pron_coding);
      else {}
    else
      fprintf(stderr, "%s:warning, unknown pron_table coding '%s'.\n",
	      program_name, pron_coding);
  }
    
  /* Start dump. */
  while (1) {
    ret = pdicr_dump_dump(&book, &search, &content);
    if (ret != PDICR_SUCCESS) {
      fprintf(stderr,
	      "%s: error occurs in pdicr_dump_dump.\n", program_name);
      show_error(program_name, ret);
      return ret;
    }
    if (!pdicr_content_is_loaded(&content)) {
      break;
    }

    ret = output_function();
    if (ret != PDICR_SUCCESS)
      fprintf(stderr, "%s: entry was skipped, %s\n", program_name,
	      pdicr_error_message(ret));
  }
  
  pdicr_content_finalize(&content);
  pdicr_dump_finalize(&search);
  pdicr_book_finalize(&book);

  return 0;
}

void output_help () {
  printf("Usage: %s [option...] file\n", program_name);
  puts("Options:");
  puts("  -v, --version       Show version information.");
  puts("  -h, --help          Show this message.");
  puts("");
  puts("  -H, --heading-encoding=name   Set character encoding of heading.");
  puts("  -T, --text-encoding=name      Set character encoding of text.");
  puts("  -C, --citation-encoding=name  Set character encoding of citation.");
  puts("                 Above encoding options are ignored on BOCU-1 dictionary.");
  puts("");
  puts("  -p, --pron-encoding=name      Set character encoding of pronounciation.");
  puts("                                Belows are available.");
  puts("                                  asis");
  puts("                                  silipa93");
  puts("                                Default depends on dictionary.");
  puts("");
  puts("  -o, --output-format=name      Set output format. Belows are available.");
  puts("                                  pdic1 PDIC 1-line format.");
  puts("                                  csv   PDIC CSV format.");
  puts("                                Default is csv.");
}

void set_output_format (char *format) {
  if (!strcmp(format, "csv"))
    output_function = output_content_as_csv;
  else if (!strcmp(format, "pdic1"))
    output_function = output_content_as_pdic1;
  else {
    fprintf(stderr, "%s:warning, unknown format '%s'.\n",
	    program_name, format);
  }
}

void set_coding (char *coding, char **ptr) {
  iconv_t cd;
  char *new_ptr;

  cd = iconv_open(PDICR_ICONV_NAME_UTF8, coding);
  if (cd == (iconv_t) -1)
    fprintf(stderr, "%s:warning, invalid encoding '%s'.\n",
	    program_name, coding);
  else {
    iconv_close(cd);
    set_coding_internal(coding, ptr);
  }
}

void set_coding_internal (char *coding, char **ptr) {
  char *new_ptr;

  new_ptr = strdup(coding);
  if (new_ptr) {
    if (*ptr) free(*ptr);
    *ptr = new_ptr;
  } else
    fprintf(stderr, "%s:warning, memory exhausted.\n", program_name);
}

void set_pron_coding (char *coding) {
  set_coding_internal(coding, &pron_coding);
}


PDICR_Error_Code content_get
(char **keyword, char **heading, char **text, char **citation, char **pron) {
  PDICR_Error_Code ret = PDICR_SUCCESS;

  if (keyword)  *keyword  = NULL;
  if (heading)  *heading  = NULL;
  if (text)     *text     = NULL;
  if (citation) *citation = NULL;
  if (pron)     *pron     = NULL;

  if (keyword) {
    ret = pdicr_content_keyword(&book, &content, keyword);
    if ((ret != PDICR_ERR_NO_KEYWORD_CONTENT) &&
	(ret != PDICR_SUCCESS)) {
      fprintf(stderr, "%s: failed to get keyword\n", program_name);
      goto failed;
    }
  }
  
  if (heading) {
    if (heading_coding)
      /*
       * In case of not BOCU-1 dictionary, heading doesn't have keyword.
       */
      ret = codeconv(PDICR_ICONV_NAME_UTF8, heading_coding,
		     content.heading, heading);
    else
      ret = pdicr_content_heading(&book, &content, heading);
    if (ret != PDICR_SUCCESS) {
      fprintf(stderr, "%s: failed to get heading\n", program_name);
      goto failed;
    }
  }

  if (text && pdicr_content_have_text(&content)) {
    if (text_coding)
      ret = codeconv(PDICR_ICONV_NAME_UTF8, text_coding,
		     content.text, text);
    else {
      ret = pdicr_content_text(&book, &content, text);
    }
    if (ret != PDICR_SUCCESS) {
      fprintf(stderr, "%s: failed to get text\n", program_name);
      goto failed;
    }
  }

  if (citation && pdicr_content_have_citation(&content)) {
    if (citation_coding)
      ret = codeconv(PDICR_ICONV_NAME_UTF8, citation_coding,
		     content.citation, citation);
    else
      ret = pdicr_content_citation(&book, &content, citation);
    if (ret != PDICR_SUCCESS) {
      fprintf(stderr, "%s: failed to get citation\n", program_name);
      goto failed;
    }
  }

  if (pron) {
    ret = pdicr_content_pron(&book, &content, pron);
    if ((ret != PDICR_ERR_NO_PRON_CONTENT) && 
	(ret != PDICR_SUCCESS)) {
      fprintf(stderr, "%s: failed to get pronunciation\n", program_name);
      goto failed;
    }
  }

  ret = PDICR_SUCCESS;

 failed:
  return ret;
}

PDICR_Error_Code output_content_as_pdic1 () {
  PDICR_Error_Code ret;
  char *heading, *text, *citation;

  ret = content_get(NULL, &heading, &text, &citation, NULL);
  if (ret != PDICR_SUCCESS) goto failed;

  if (text) {
    ret = output_content_as_pdic1_internal(&text);
    if (ret != PDICR_SUCCESS) goto format_failed;
  }

  if (pdicr_content_have_citation(&content)) {
    ret = output_content_as_pdic1_internal(&citation);
    if (ret != PDICR_SUCCESS) goto format_failed;
    printf("%s /// %s / %s\n", heading, text ? text : "", citation);
  } else {
    printf("%s /// %s\n", heading, text ? text : "");
  }

 format_failed:
  if (ret != PDICR_SUCCESS)
    fprintf(stderr, "%s: falied to format string\n", program_name);
 failed:
  if (heading)  pdicr_free(heading);
  if (text)     pdicr_free(text);
  if (citation) pdicr_free(citation);
  return ret;
}

/* convert LFs to " \\ ". */
PDICR_Error_Code output_content_as_pdic1_internal (char **string) {
  PDICR_Error_Code ret = PDICR_SUCCESS;
  int i, count = 0;
  size_t length;

  if (!(string && *string)) return ret;

  length = strlen(*string);

  /* count LFs. */
  for (i = 0; i < length; i++)
    if ((*string)[i] == '\n') count += 2;

  ret = pdicr_realloc(length + 1 + count, string);
  if (ret != PDICR_SUCCESS) return ret;

  for (i = length; i >= 0; i--) {
    if ((*string)[i] == '\n') {
      (*string)[i + count - 2] = ' ';
      (*string)[i + count - 1] = '\\';
      (*string)[i + count    ] = ' ';
      count -= 2;
    } else if (count) {
      (*string)[i + count] = (*string)[i];
    } else
      break;
  }

  return ret;
}

PDICR_Error_Code output_content_as_csv() {
  PDICR_Error_Code ret;
  char *keyword, *pron, *heading, *text, *citation;

  ret = content_get(&keyword, &heading, &text, &citation, &pron);
  if (ret != PDICR_SUCCESS) goto failed;

  if (keyword) {
    ret = output_content_as_csv_internal(&keyword);
    if (ret != PDICR_SUCCESS) goto format_failed;
  }

  ret = output_content_as_csv_internal(&heading);
  if (ret != PDICR_SUCCESS) goto format_failed;

  if (text) {
    ret = output_content_as_csv_internal(&text);
    if (ret != PDICR_SUCCESS) goto format_failed;
  }

  if (citation) {
    ret = output_content_as_csv_internal(&citation);
    if (ret != PDICR_SUCCESS) goto format_failed;
  }
  
  if (pron) {
    ret = output_content_as_csv_internal(&pron);
    if (ret != PDICR_SUCCESS) goto format_failed;
  }

  printf("%s,%s,%s,%s,%d,%d,%d,%s\n", keyword ? keyword : "", heading,
	 text ? text : "", citation ? citation : "", content.level,
	 content.important, content.modified, pron ? pron : "");

 format_failed:
  if (ret != PDICR_SUCCESS)
    fprintf(stderr, "%s: falied to format string\n", program_name);
 failed:
  if (keyword)  pdicr_free(keyword);
  if (heading)  pdicr_free(heading);
  if (text)     pdicr_free(text);
  if (citation) pdicr_free(citation);
  if (pron)     pdicr_free(pron);
  return ret;
}

/*
 * convert '"' to '""'. 
 * And quote with '"' if needed.
 */
PDICR_Error_Code output_content_as_csv_internal (char **string) {
  PDICR_Error_Code ret = PDICR_SUCCESS;
  int i = 0, count = 0, quote = 0;
  size_t length, new_length;

  if (!(string && *string)) return ret;

  length = strlen(*string);

  /* count '"'. */
  for (; i < length; i++) {
    if ((*string)[i] == '\"') {
      count++;
      quote = 1;
    } else if ((*string)[i] == ',' || (*string)[i] == '\n')
      quote = 1;
  }

  new_length = length + count + quote * 2;
  ret = pdicr_realloc(new_length + 1, string);
  if (ret != PDICR_SUCCESS) return ret;

  (*string)[new_length] = 0;
  if (quote) {
    (*string)[new_length - 1] = '\"';
    new_length -= 2;
  } else {
    new_length--;
  }

  for (i = length - 1; i >= 0; i--, new_length--) {
    if ((*string)[i] == '\"') {
      (*string)[new_length - 1] = '\"';
      (*string)[new_length    ] = '\"';
      new_length--;
    } else if (new_length != i) {
      (*string)[new_length] = (*string)[i];
    } else
      break;
  }

  if (quote) {
    (*string)[0] = '\"';
  }

  return ret;
}


PDICR_Error_Code codeconv
(const char *to_code, const char *from_code, const char *string,
 char **to_string) {
  PDICR_Error_Code ret = PDICR_SUCCESS;
  iconv_t cd;
  int buffer_size, i = 0;
  char *dst, *buffer;
  size_t srcleft, dstleft, iconv_res;
  int res_length;
  char *src;
  size_t length = strlen(string);

  cd = iconv_open(to_code, from_code);
  if (cd == (iconv_t) -1) return PDICR_ERR_ICONV_OPEN_FAILURE;

  buffer_size = length + 1;
  ret = pdicr_malloc(buffer_size, &buffer);
  if (ret != PDICR_SUCCESS) goto finished;

  src = string;
  srcleft = length;
  dst = buffer;
  dstleft = buffer_size - 1;
  while (i < 2) {
    iconv_res = iconv(cd, &src, &srcleft, &dst, &dstleft);
    if (iconv_res == (size_t) -1) {
      if (errno == E2BIG) {
	dstleft += buffer_size;
	buffer_size += buffer_size;

	ret = pdicr_realloc(buffer_size, &buffer);
	if (ret != PDICR_SUCCESS) goto finished;
	dst = buffer + buffer_size - 1 - dstleft;
      } else {
	ret = PDICR_ERR_ICONV_FAILURE;
	goto finished;
      }
    } else {
      src = NULL;
      i++;
    }
  }

  *dst = 0;
  res_length = (int) (dst - buffer);
  ret = pdicr_malloc(res_length + 1, to_string);
  if (ret != PDICR_SUCCESS) goto finished;
  memcpy(*to_string, buffer, res_length + 1);

 finished:
  if (buffer) pdicr_free(buffer);
  if (cd != (iconv_t) -1) iconv_close(cd);
  return ret;
}

void output_version (const char *program_name) {
  printf("%s version %s\n", program_name, VERSION);
  printf("Copyright (C) 2010-2019 Kazuhiro Ito\n");
}

void show_error (const char *program_name, const PDICR_Error_Code error) {
  fprintf(stderr, "%s: %s\n", program_name, pdicr_error_message(error));
}
