;;; ndwnj.el --- search inferface for Japanese WordNet -*- coding: utf-8; lexical-binding: t -*-
;; Copyright (C) 2009  Kazuhiro Ito <kzhr@d1.dion.ne.jp>

;; Author: Kazuhiro Ito <kzhr@d1.dion.ne.jp>

;; ndwnj.el is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2 of the License, or
;; (at your option) any later version.

;; ndwnj.el is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with Lookup; if not, write to the Free Software Foundation,
;; Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

;;; Commentary:

;;; Code:

(require 'lookup)
(require 'lookup-vse)

(defconst ndwnj-version "0.1")

;;;
;;; Customizable variables
;;;

(defgroup ndwnj nil
  "Lookup ndwnj interface."
  :group 'lookup-agents)

(defcustom ndwnj-use-builtin-sqlite
  (and (fboundp 'sqlite-available-p)
       (sqlite-available-p))
  "When non-nil, use built-in SQLite support."
  :type 'string
  :group 'ndwnj)

(defcustom ndwnj-program-name "sqlite3"
  "*Program name of sqlite3."
  :type 'string
  :group 'ndwnj)

(defcustom ndwnj-program-arguments '("-interactive")
  "*A list of arguments for sqlite3."
  :type '(repeat (string :tag "option"))
  :group 'ndwnj)

(defcustom ndwnj-prompt-string "sqlite> "
  "*Prompt string of sqlite3."
  :type 'string
  :group 'ndwnj)

(defcustom ndwnj-process-coding-system
  (if (eq system-type 'windows-nt)
      ;; Official Windows's sqlite3.exe converts received text from
      ;; OEM (ANSI?) codepage to utf-8.
      (cdr default-process-coding-system)
    'utf-8)
  "*Coding system of a text to be sent to sqlite3 process."
  :type 'symbol
  :group 'ndwnj)


;;;
;;; Internal variables
;;;


;;;
;:: types
;;;

(defun ndwnj-agent-coding (agent)
  (lookup-agent-option agent :coding))

(put 'ndwnj :methods '(exact prefix suffix substring wild))
(put 'ndwnj :reference-pattern '("<\\([0-9]+-[a-z]+\\):\\(.+?\\)>" 2 2 1))
(put 'ndwnj :arranges
     '(lookup-arrange-default-headings
       ndwnj-arrange-remove-dups
       ndwnj-arrange-headings
       lookup-arrange-references
       lookup-arrange-fill-lines))
(put 'ndwnj :adjusts
     '(lookup-adjust-check-references
       lookup-adjust-goto-min))
(put 'ndwnj :normalizers
     '((lookup-normalizer-wrapper downcase)
       ndwnj-normalizer-space-to-underscore))
(put 'ndwnj :coding 'utf-8)


;;;
;:: Interface functions
;;;

(put 'ndwnj 'setup 'ndwnj-setup)
(defun ndwnj-setup (agent)
  (ndwnj-start-process agent)
  (list (lookup-new-dictionary agent "wnj" "wnj" "日本語WordNet")))

(put 'ndwnj 'clear 'ndwnj-clear)
(defun ndwnj-clear (agent)
  (let ((database (lookup-agent-get-property agent 'database)))
    (cond
     (database
      (when (sqlitep database)
	(sqlite-close database))
      (lookup-agent-put-property agent 'database nil))
     (t
      (let* ((process (lookup-agent-get-property agent 'process))
	     (buffer (process-buffer process)))
	(when process
	  (when (eq (process-status process) 'run)
	    (process-send-string process ".exit\n"))
	  (lookup-process-kill process))
	(when (buffer-live-p buffer)
	  (kill-buffer buffer))
	(lookup-agent-put-property agent 'process nil))))))

(put 'ndwnj 'search 'ndwnj-dictionary-search)
(defun ndwnj-dictionary-search (dictionary query)
  (let* ((agent (lookup-dictionary-agent dictionary))
	 (database (lookup-agent-get-property agent 'database))
	 (method (lookup-query-method query))
	 (string (lookup-query-string query))
	 cmd)
    (cond
     (database
      (let ((table '((exact . "= ? ")
		     (t . "GLOB ? ")))
	    entries s elt)
	(cond
	 ((eq method 'prefix)
	  (setq string (concat string "*")))
	 ((eq method 'suffix)
	  (setq string (concat "*" string)))
	 ((eq method 'substring)
	  (setq string (concat "*" string "*"))))
	(setq cmd (concat "SELECT synset, name, pos_def.def "
			  "FROM synset, pos_def ON synset.pos = pos_def.pos "
			  "WHERE synset IN "
			  "(SELECT synset FROM sense WHERE wordid IN "
			  "(SELECT wordid FROM word WHERE lemma "
			  (cdr (or (assoc method table) (assoc t table)))
			  ")) AND pos_def.lang = 'jpn' "
			  (when (and (numberp lookup-max-hits)
				     (> lookup-max-hits 0))
			    (concat "LIMIT "
				    (number-to-string lookup-max-hits)))
			  ";"))
	(setq s (sqlite-select database cmd (vector string) 'set))
	(while (setq elt (sqlite-next s))
	  (setq entries (cons (lookup-make-entry
			       dictionary (car elt)
			       (format "%s (%s)" (nth 1 elt) (nth 2 elt)))
			      entries)))
	(nreverse entries)))
     (t
      (setq string (ndwnj-escape-string string))
      (cond
       ((eq method 'exact)
	(setq cmd (concat "= '" string)))
       ((eq method 'prefix)
	(setq cmd (concat "GLOB '" string "*")))
       ((eq method 'suffix)
	(setq cmd (concat "GLOB '*" string)))
       ((eq method 'substring)
	(setq cmd (concat "GLOB '*" string "*")))
       ((eq method 'wild)
	(setq cmd (concat "GLOB '" string))))
      (ndwnj-require (lookup-dictionary-agent dictionary)
	(concat "SELECT synset, name, pos_def.def "
		"FROM synset, pos_def ON synset.pos = pos_def.pos "
		"WHERE synset IN "
		"(SELECT synset FROM sense WHERE wordid IN "
		"(SELECT wordid FROM word WHERE lemma "
		cmd
		"')) AND pos_def.lang = 'jpn' "
		(when (and (numberp lookup-max-hits)
			   (> lookup-max-hits 0))
		  (concat "LIMIT " (number-to-string lookup-max-hits)))
		";")
	(lambda (_process)
	  (let (code heading entries)
	    (while (re-search-forward "^\\(.+\\)|\\(.+\\)|\\(.+\\)$" nil t)
	      (setq code (match-string 1)
		    heading (concat (match-string 2)
				    " (" (match-string 3) ")"))
	      (setq entries
		    (cons (lookup-make-entry dictionary code heading)
			  entries)))
	    (nreverse entries))))))))

(put 'ndwnj 'content 'ndwnj-dictionary-content)
(defun ndwnj-dictionary-content (dictionary entry)
  (let ((agent (lookup-dictionary-agent dictionary))
	(code (lookup-entry-code entry))
	heading def ex word link)
    (setq heading (concat (lookup-entry-code entry)
			  " "
			  (lookup-entry-heading entry))
	  def  (ndwnj-get-definition agent code)
	  ex   (ndwnj-get-examples agent code)
	  word (ndwnj-get-words agent code)
	  link (ndwnj-get-links agent code))
    (let ((index 0))
      (while (setq index (and (string-match ": \\(.+\\)$" ex index)
			      (match-end 0)))
	(setq def (replace-regexp-in-string
		   (concat "; \"" (regexp-quote (match-string 1 ex)) "\"")
		   "" def t t))))
    (concat heading "\n" def "\n"
	    (and ex (concat ex "\n")) word "\n" link "\n")))



;;;
;;; Arrange functions
;;;

(defun ndwnj-arrange-remove-dups (_entry)
  (goto-char (point-max))
  (beginning-of-line)
  (while (null (bobp))
    (let ((start (point))
	  (end (progn (end-of-line) (point))))
      (forward-line -1)
      (when (and (null (eq start end))
		 (looking-at (regexp-quote (buffer-substring start end))))
	(delete-region start (min (1+ end) (point-max)))))))

(defun ndwnj-arrange-headings (_entry)
  (while (re-search-forward
	  "^\\( *\\)\\(\\(\\[[^][]+\\]\\)\\|\\([a-zA-Z]+:\\)\\)" nil t)
    (cond
     ((match-beginning 3)
      (lookup-make-region-heading (match-beginning 3) (match-end 3) 2))
     ((match-beginning 4)
      (lookup-make-region-heading (match-beginning 4) (match-end 4) 3)))))



;;;
;;; Normalizers
;;;

(defun ndwnj-normalizer-space-to-underscore (string)
  (list (apply 'string (mapcar (lambda (elt) (if (eq elt ?\s) ?_ elt))
			       string))))


;;;
;;; Internal functions
;;;

(defun ndwnj-escape-string (string)
  (apply 'concat (mapcar (lambda (elt) (if (eq elt ?') "''" (list elt)))
			 string)))

(defun ndwnj-get-words (agent code)
  (let ((database (lookup-agent-get-property agent 'database))
	results elt)
    (cond
     (database
      (let* ((q (eval-when-compile
		  (concat "SELECT lemma, lang FROM word WHERE wordid IN "
			  "(SELECT wordid FROM sense WHERE synset = ?) "
			  "ORDER BY lang DESC;")))
	     (s  (sqlite-select database q (vector code) 'set))
	     val)
	(while (setq elt (sqlite-next s))
	  (setq val (cons (car elt) (lookup-assoc-ref results (nth 1 elt))))
	  (setq results (lookup-assoc-set results (nth 1 elt) val)))))
     (t
      (ndwnj-require agent
	(concat "SELECT lemma, lang FROM word WHERE wordid IN "
		"(SELECT wordid FROM sense WHERE synset = '"
		(ndwnj-escape-string code)
		"') ORDER BY lang DESC;")
	(lambda (_process)
	  (let (lemma lang)
	    (while (re-search-forward "^\\(.+\\)|\\(.+\\)$" nil t)
	      (setq lemma (match-string 1)
		    lang (match-string 2))
	      (setq elt (cons lemma (lookup-assoc-ref results lang)))
	      (setq results (lookup-assoc-set results lang elt))))))))
    (setq elt nil)
    (while results
      (setq elt (append elt
			`("  " ,(caar results) ": "
			  ,(mapconcat #'identity
				      (nreverse (cdar results)) ", ")
			  "\n")))
      (setq results (cdr results)))
    (apply #'concat elt)))

(defun ndwnj-get-examples (agent code)
  (let ((database (lookup-agent-get-property agent 'database)))
    (cond
     (database
      (let* ((q (eval-when-compile
		  (concat
		   "SELECT lang, def, sid FROM synset_ex WHERE synset = "
		   "? ORDER BY sid ASC, lang ASC;")))
	     (s (sqlite-select database q (vector code) 'set))
	     elt result)
	(while (setq elt (sqlite-next s))
	  (setq result (append result
			       (list " " (car elt) ": " (nth 1 elt) "\n"))))
	(apply #'concat result)))
     (t
      (ndwnj-require agent
	(concat "SELECT lang, def, sid FROM synset_ex WHERE synset = '"
		(ndwnj-escape-string code)
		"' ORDER BY sid ASC, lang ASC;")
	(lambda (_process)
	  (while (re-search-forward "^\\(.+\\)|\\(.+\\)|.+$" nil t)
	    (replace-match
	     (concat "  " (match-string 1) ": " (match-string 2)) t t))
	  (buffer-string)))))))

(defun ndwnj-get-definition (agent code)
  (let ((database (lookup-agent-get-property agent 'database)))
    (cond
     (database
      (let* ((q (eval-when-compile
		  (concat "SELECT lang, def FROM synset_def WHERE synset = "
			  "? ORDER BY lang;")))
	     (s (sqlite-select database q (vector code) 'set))
	     elt result)
	(while (setq elt (sqlite-next s))
	  (setq result (append (list (car elt) ": " (nth 1 elt) "\n") result)))
	(apply #'concat result)))
     (t
      (ndwnj-require agent
	(concat "SELECT lang, def FROM synset_def WHERE synset = '"
		(ndwnj-escape-string code)
		"' ORDER BY lang;")
	(lambda (_process)
	  (while (re-search-forward "^\\(.+\\)|\\(.+\\)$" nil t)
	    (replace-match
	     (concat (match-string 1) ": " (match-string 2)) t t))
	  (buffer-string)))))))

(defun ndwnj-get-links (agent code)
  (let ((database (lookup-agent-get-property agent 'database))
	elt result results)
    (cond
     (database
      (let* ((database (lookup-agent-get-property agent 'database))
	     (q (eval-when-compile
		  (concat "SELECT synlink.synset2, synset.name, link_def.def "
			  "FROM synlink, synset ON synlink.synset2 = synset.synset "
			  ", link_def ON synlink.link = link_def.link "
			  "WHERE synlink.synset1 = ? "
			  "ORDER BY synlink.link DESC, synset.name;")))
	     (s (sqlite-select database q (vector code) 'set)))
	(while (setq elt (sqlite-next s))
	  (setq result (concat (lookup-assoc-ref results (nth 2 elt))
			       " <" (car elt) ":" (nth 1 elt) ">"))
	  (setq results (lookup-assoc-set results (nth 2 elt) result)))
	(setq result nil)))
     (t
      (setq results
	    (ndwnj-require agent
	      (concat "SELECT synlink.synset2, synset.name, link_def.def "
		      "FROM synlink, synset ON synlink.synset2 = synset.synset "
		      ", link_def ON synlink.link = link_def.link "
		      "WHERE synlink.synset1 = '"
		      (ndwnj-escape-string code)
		      "' ORDER BY synlink.link DESC, synset.name;")
	      (lambda (_process)
		(let (target name syn elt results)
		  (while (re-search-forward
			  "^\\(.+\\)|\\(.+\\)|\\(.+\\)$" nil t)
		    (setq target (match-string 1)
			  name (match-string 2)
			  syn (match-string 3))
		    (setq elt (concat 
			       (lookup-assoc-ref results syn)
			       " <" target ":" name ">"))
		    (setq results (lookup-assoc-set results syn elt))))
		results)))))
    (while results
      (setq elt (car results))
      (setq result (concat result " [" (car elt) "]" (cdr elt) "\n"))
      (setq results (cdr results)))
    result))

(defun ndwnj-start-process (agent)
  "Start sqlite3 process if it does not start."
  (let ((database (lookup-agent-get-property agent 'database))
	(process (lookup-agent-get-property agent 'process))
	(file (expand-file-name (lookup-agent-location agent))))
    (cond
     ((and (or ndwnj-use-builtin-sqlite database)
	   (null process))
      (unless (file-readable-p file)
	(error "File %s does't exist" file))
      (unless (sqlitep database)
	(lookup-agent-put-property agent 'database (sqlite-open file))))
     (t
      (unless (and (processp process)
		   (eq (process-status process) 'run))
	(let* ((buffer (or (lookup-open-process-buffer
			    (concat " *ndwnj+" file "*"))
			   (lookup-temp-buffer)))
	       (coding (ndwnj-agent-coding agent)))
	  (setq process (apply 'start-process "ndwnj" buffer
			       ndwnj-program-name
			       (append ndwnj-program-arguments (list file))))
	  (set-process-coding-system
	   process coding ndwnj-process-coding-system)
	  (set-process-query-on-exit-flag process nil)
	  (with-current-buffer buffer
	    (catch 'started
	      (while (accept-process-output process 10)
		(save-excursion
		  (goto-char (point-min))
		  (when (search-forward ndwnj-prompt-string nil t)
		    (throw 'started t))))
	      (error "Failed start process")))
	  (unless lookup-debug-mode
	    (set-process-buffer process nil)
	    (kill-buffer buffer)))
	(lookup-agent-put-property agent 'process process))))))

(put 'ndwnj-require 'lisp-indent-function 1)
(defun ndwnj-require (agent string &optional filter)
  "Send string to sqlite3 process and return output.
Cf. `lookup-process-require'"
  (let ((lookup-process-output-separator-lines 0))
    (lookup-process-require (lookup-agent-get-property agent 'process)
			    (concat string "\n")
			    (concat "^" ndwnj-prompt-string) filter)))
  
(provide 'ndwnj)

;;; ndwnj.el ends here
