NIIBE Yutaka
gniib****@fsij*****
2010年 8月 2日 (月) 14:30:47 JST
過去に実装が試みられて捨てられた texttrie と呼ばれていたものの削除です。 janitor/remove-texttrie ブランチとしました。 Read-only でサポートしようかとも思いましたが、単に削除することにしました。 2010-08-02 NIIBE Yutaka <gniib****@fsij*****> * anthy/texttrie.h: Removed. * anthy/Makefile.am (noinst_HEADERS): Remove texttrie.h. * * src-worddic/Makefile.am (libanthydic_la_SOURCES): Remove texttrie.c. * src-worddic/dic_util.c (anthy_private_tt_dic, find_next_key) (delete_prefix, encoding_prefix, find_same_word): Removed. (struct iterate_contex): Removed in_tt and key_buf. (anthy_dic_util_init, select_first_entry_in_textdic): Don't access to in_tt field. (anthy_priv_dic_delete): Don't call delete_prefix. (anthy_priv_dic_select_first_entry): Just call select_first_entry_in_textdic. (anthy_priv_dic_select_next_entry): Don't call find_next_key. (anthy_priv_dic_get_index): Don't access in_tt and key_buf. (anthy_priv_dic_get_freq): Just call anthy_parse_word_line. (anthy_priv_dic_get_wtype, anthy_priv_dic_get_word): Don't call anthy_trie_find. (add_word_to_textdic): Don't call anthy_trie_delete. * src-worddic/priv_dic.c (anthy_private_tt_dic, open_tt_dic) (copy_words_from_tt): Removed. (anthy_priv_dic_update): Don't call anthy_trie_update_mapping. (anthy_copy_words_from_private_dic): Don't call copy_words_from_tt. (anthy_init_private_dic): Dont' call open_tt_dic and anthy_trie_close. (anthy_release_private_dic): Don't call anthy_trie_close. * src-worddic/texttrie.c: Removed. * src-worddic/word_dic.c: Comment fix. * src-worddic/dic_main.h (anthy_priv_dic_update): Removed. * src-worddic/priv_dic.c: Likewise. * src-worddic/word_dic.c (anthy_lock_dic): Don't call anthy_priv_dic_update. diff --git a/anthy/Makefile.am b/anthy/Makefile.am index e11e8a8..ac28fc5 100644 --- a/anthy/Makefile.am +++ b/anthy/Makefile.am @@ -6,7 +6,7 @@ noinst_HEADERS = xstr.h xchar.h dic.h wtype.h\ filemap.h \ logger.h segclass.h \ depgraph.h \ - texttrie.h textdic.h matrix.h \ + textdic.h matrix.h \ prediction.h word_dic.h \ diclib.h feature_set.h \ corpus.h convdb.h diff --git a/anthy/texttrie.h b/anthy/texttrie.h deleted file mode 100644 index 84c0ef3..0000000 --- a/anthy/texttrie.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _texttrie_h_included_ -#define _texttrie_h_included_ - -struct text_trie; - -/* database manipulation */ -struct text_trie *anthy_trie_open(const char *fn, int create); -void anthy_trie_close(struct text_trie *tt); -void anthy_trie_update_mapping(struct text_trie *tt); -/**/ -int anthy_trie_add(struct text_trie *tt, const char *key, const char *body); -/* caller should free the result */ -char *anthy_trie_find(struct text_trie *a, char *key); -void anthy_trie_delete(struct text_trie *tt, const char *key); -/**/ -char *anthy_trie_find_next_key(struct text_trie *tt, char *buf, int len); -/**/ -void anthy_trie_find_prefix(struct text_trie *tt, const char *str, - char *buf, int len, - int (*cb)(const char *key, const char *str)); -/**/ -void anthy_trie_print_array(struct text_trie *tt); - -#endif diff --git a/src-worddic/Makefile.am b/src-worddic/Makefile.am index c1476f6..8d230bf 100644 --- a/src-worddic/Makefile.am +++ b/src-worddic/Makefile.am @@ -3,7 +3,7 @@ INCLUDES = -I$(top_srcdir)/ -DCONF_DIR=\"$(sysconfdir)\" libanthydic_la_SOURCES = \ word_dic.c dic_util.c \ wtype.c\ - texttrie.c textdic.c record.c\ + textdic.c record.c\ word_lookup.c use_dic.c \ priv_dic.c mem_dic.c \ ext_ent.c matrix.c\ diff --git a/src-worddic/dic_main.h b/src-worddic/dic_main.h index aba6c39..e45aa34 100644 --- a/src-worddic/dic_main.h +++ b/src-worddic/dic_main.h @@ -79,7 +79,6 @@ void anthy_release_private_dic(void); void anthy_check_user_dir(void); void anthy_priv_dic_lock(void); void anthy_priv_dic_unlock(void); -void anthy_priv_dic_update(void); struct word_line { char wt[10]; int freq; diff --git a/src-worddic/dic_util.c b/src-worddic/dic_util.c index c437d2d..a346133 100644 --- a/src-worddic/dic_util.c +++ b/src-worddic/dic_util.c @@ -3,14 +3,12 @@ * * 互換性の都合で * utf8の辞書はtextdic - * eucjpの辞書はtexttrie * およびrecordを使ってて混乱しまくり * textdicへ移行する * * 開発予定 * * 新規登録はtextdicに対して行うようにする <- todo - * texttrieの単語は移行するようにする * record関係は消す * * @@ -41,36 +39,21 @@ #include <anthy/anthy.h> #include <anthy/conf.h> #include <anthy/dic.h> -#include <anthy/texttrie.h> #include <anthy/textdic.h> #include <anthy/dicutil.h> #include "dic_main.h" #include "dic_personality.h" -/* - * 個人辞書はtexttrie中に格納されるとき - * 「 見出し 数字」 -> 「#品詞*頻度 単語」という形式をとる - * (UTF8の場合は「 p見出し 数字」 -> 「#品詞*頻度 単語」) - * 最初の2文字の空白は単語情報のセクションであることを意味し、 - * 数字の部分は同音語を区別するために用いられる。 - * - */ - /* UTF8で32文字 x 3bytes */ #define MAX_KEY_LEN 96 static int gIsInit; static int dic_util_encoding; -extern struct text_trie *anthy_private_tt_dic; extern const char *anthy_private_text_dic; /* 現在選択されている読み */ static struct iterate_contex { - /**/ - int in_tt; - /* texttrie */ - char key_buf[MAX_KEY_LEN+32]; /* textdicの検索用 */ int dicfile_offset; char *current_index; @@ -117,9 +100,6 @@ anthy_dic_util_init(void) anthy_dic_set_personality("default"); gIsInit = 1; dic_util_encoding = ANTHY_EUC_JP_ENCODING; - /**/ - word_iterator.key_buf[0] = 0; - word_iterator.in_tt = 1; } /** 辞書ライブラリを解放する */ @@ -150,52 +130,10 @@ anthy_dic_util_set_personality(const char *id) anthy_dic_set_personality(id); } -static char * -find_next_key(const char *prefix) -{ - char *v; - v = anthy_trie_find_next_key(anthy_private_tt_dic, - word_iterator.key_buf, MAX_KEY_LEN+32); - - if (v && v[0] == prefix[0] && v[1] == prefix[1]) { - /* 次のkeyも指定されたprefixを持っている */ - return v; - } - /**/ - sprintf(word_iterator.key_buf, "%s", prefix); - return NULL; -} - -static void -delete_prefix(const char *prefix) -{ - sprintf(word_iterator.key_buf, "%s", prefix); - anthy_priv_dic_lock(); - /* word_iterator.key_bufがprefixの文字列であれば、find_next_key()は - 最初の単語を返す */ - while (find_next_key(prefix)) { - anthy_trie_delete(anthy_private_tt_dic, word_iterator.key_buf); - sprintf(word_iterator.key_buf, "%s", prefix); - } - anthy_priv_dic_unlock(); -} - -static const char * -encoding_prefix(int encoding) -{ - if (encoding == ANTHY_UTF8_ENCODING) { - return " p"; - } - /* EUC-JP */ - return " "; -} - /** (API) 個人辞書を全部消す */ void anthy_priv_dic_delete(void) { - delete_prefix(encoding_prefix(ANTHY_EUC_JP_ENCODING)); - /**/ while (!anthy_textdic_delete_line(anthy_private_text_dic, 0)) { /**/ } @@ -215,11 +153,9 @@ select_first_entry_in_textdic(void) { word_iterator.dicfile_offset = 0; set_current_line(NULL, NULL); - anthy_textdic_scan(anthy_private_text_dic, - word_iterator.dicfile_offset, NULL, - scan_one_word_cb); + anthy_textdic_scan(anthy_private_text_dic, word_iterator.dicfile_offset, + NULL, scan_one_word_cb); if (word_iterator.current_line) { - word_iterator.in_tt = 0; return 0; } /* 単語が無い */ @@ -230,18 +166,6 @@ select_first_entry_in_textdic(void) int anthy_priv_dic_select_first_entry(void) { - if (dic_util_encoding == ANTHY_UTF8_ENCODING) { - return select_first_entry_in_textdic(); - } - if (anthy_private_tt_dic) { - sprintf(word_iterator.key_buf, "%s", encoding_prefix(dic_util_encoding)); - /* prefixの次のエントリが最初のエントリ */ - if (find_next_key(encoding_prefix(dic_util_encoding))) { - word_iterator.in_tt = 1; - return 0; - } - } - /* 単語が無いのでtextdicに移動を試みる */ return select_first_entry_in_textdic(); } @@ -249,21 +173,13 @@ anthy_priv_dic_select_first_entry(void) int anthy_priv_dic_select_next_entry(void) { - if (!word_iterator.in_tt) { - set_current_line(NULL, NULL); - anthy_textdic_scan(anthy_private_text_dic, word_iterator.dicfile_offset, - NULL, - scan_one_word_cb); - if (word_iterator.current_line) { - return 0; - } - return ANTHY_DIC_UTIL_ERROR; - } - if (find_next_key(encoding_prefix(dic_util_encoding))) { + set_current_line(NULL, NULL); + anthy_textdic_scan(anthy_private_text_dic, word_iterator.dicfile_offset, + NULL, scan_one_word_cb); + if (word_iterator.current_line) { return 0; } - /* 単語が無いのでtextdicに移動を試みる */ - return select_first_entry_in_textdic(); + return ANTHY_DIC_UTIL_ERROR; } /** 未実装 */ @@ -280,12 +196,8 @@ anthy_priv_dic_get_index(char *buf, int len) { int i; char *src_buf; - if (word_iterator.in_tt) { - src_buf = &word_iterator.key_buf[2]; - } else { - src_buf = word_iterator.current_index; - } - if (!word_iterator.in_tt && dic_util_encoding == ANTHY_EUC_JP_ENCODING) { + src_buf = word_iterator.current_index; + if (dic_util_encoding == ANTHY_EUC_JP_ENCODING) { /**/ src_buf = anthy_conv_utf8_to_euc(src_buf); } else { @@ -310,13 +222,8 @@ anthy_priv_dic_get_freq(void) { struct word_line res; char *v; - if (word_iterator.in_tt) { - v = anthy_trie_find(anthy_private_tt_dic, word_iterator.key_buf); - anthy_parse_word_line(v, &res); - free(v); - } else { - anthy_parse_word_line(word_iterator.current_line, &res); - } + + anthy_parse_word_line(word_iterator.current_line, &res); return res.freq; } @@ -326,13 +233,7 @@ anthy_priv_dic_get_wtype(char *buf, int len) { struct word_line res; char *v; - if (word_iterator.in_tt) { - v = anthy_trie_find(anthy_private_tt_dic, word_iterator.key_buf); - anthy_parse_word_line(v, &res); - free(v); - } else { - anthy_parse_word_line(word_iterator.current_line, &res); - } + anthy_parse_word_line(word_iterator.current_line, &res); if (len - 1 < (int)strlen(res.wt)) { return NULL; } @@ -346,27 +247,20 @@ anthy_priv_dic_get_word(char *buf, int len) { char *v; char *s; - if (word_iterator.in_tt) { - v = anthy_trie_find(anthy_private_tt_dic, word_iterator.key_buf); - } else { - v = word_iterator.current_line; - } + v = word_iterator.current_line; if (!v) { return NULL; } /* 品詞の後ろにある単語を取り出す */ s = strchr(v, ' '); s++; - if (!word_iterator.in_tt && dic_util_encoding == ANTHY_EUC_JP_ENCODING) { + if (dic_util_encoding == ANTHY_EUC_JP_ENCODING) { s = anthy_conv_utf8_to_euc(s); snprintf(buf, len, "%s", s); free(s); } else { snprintf(buf, len, "%s", s); } - if (word_iterator.in_tt) { - free(v); - } return buf; } @@ -437,40 +331,6 @@ dup_word_check(const char *v, const char *word, const char *wt) } static int -find_same_word(char *idx_buf, const char *yomi, - const char *word, const char *wt_name, int yomi_len) -{ - int found = 0; - sprintf(idx_buf, "%s%s ", - encoding_prefix(dic_util_encoding), - yomi); - anthy_trie_find_next_key(anthy_private_tt_dic, - idx_buf, yomi_len + 12); - - /* trieのインデックスを探す */ - do { - char *v; - if (strncmp(&idx_buf[2], yomi, yomi_len) || - idx_buf[yomi_len+2] != ' ') { - /* 見出語が異なるのでループ終了 */ - break; - } - /* texttrieにアクセスして、見出語以外も一致しているかをチェック */ - v = anthy_trie_find(anthy_private_tt_dic, idx_buf); - if (v) { - found = dup_word_check(v, word, wt_name); - free(v); - if (found) { - break; - } - } - } while (anthy_trie_find_next_key(anthy_private_tt_dic, - idx_buf, yomi_len + 12)); - - return found; -} - -static int add_word_to_textdic(const char *yomi, const char *word, const char *wt_name, int freq) { @@ -486,15 +346,6 @@ add_word_to_textdic(const char *yomi, const char *word, return ANTHY_DIC_UTIL_ERROR; } - /* texttrieにあれば消す */ - if (anthy_private_tt_dic) { - char *idx_buf = malloc(yomi_len + 12); - if (find_same_word(idx_buf, yomi, word, wt_name, yomi_len)) { - anthy_trie_delete(anthy_private_tt_dic, idx_buf); - } - free(idx_buf); - } - /* 同じ物があったら消す */ sc.yomi = yomi; sc.word = word; @@ -502,8 +353,7 @@ add_word_to_textdic(const char *yomi, const char *word, /**/ sc.offset = 0; sc.found_word = 0; - anthy_textdic_scan(anthy_private_text_dic, 0, &sc, - find_cb); + anthy_textdic_scan(anthy_private_text_dic, 0, &sc, find_cb); if (sc.found_word == 1) { anthy_textdic_delete_line(anthy_private_text_dic, sc.offset); } @@ -513,8 +363,7 @@ add_word_to_textdic(const char *yomi, const char *word, /* 追加する場所を探す */ sc.offset = 0; sc.found_word = 0; - anthy_textdic_scan(anthy_private_text_dic, 0, &sc, - order_cb); + anthy_textdic_scan(anthy_private_text_dic, 0, &sc, order_cb); /* 追加する */ rv = do_add_word_to_textdic(anthy_private_text_dic, sc.offset, yomi, word, wt_name, freq); diff --git a/src-worddic/priv_dic.c b/src-worddic/priv_dic.c index c11abe4..814123c 100644 --- a/src-worddic/priv_dic.c +++ b/src-worddic/priv_dic.c @@ -38,14 +38,12 @@ #include <anthy/dicutil.h> #include <anthy/conf.h> #include <anthy/logger.h> -#include <anthy/texttrie.h> #include <anthy/textdic.h> #include <anthy/word_dic.h> #include "dic_main.h" #include "dic_ent.h" /* 個人辞書 */ -struct text_trie *anthy_private_tt_dic; const char *anthy_private_text_dic; static const char *anthy_imported_text_dic; static char *imported_dic_dir; @@ -89,17 +87,6 @@ init_lock_fn(const char *home, const char *id) sprintf(lock_fn, "%s/.anthy/lock-file_%s", home, id); } -static struct text_trie * -open_tt_dic(const char *home, const char *id) -{ - struct text_trie *tt; - char *buf = malloc(strlen(home) + strlen(id) + 40); - sprintf(buf, "%s/.anthy/private_dict_%s.tt", home, id); - tt = anthy_trie_open(buf, 0); - free(buf); - return tt; -} - static const char * textdicname (const char *home, const char *name, const char *id) { @@ -152,16 +139,6 @@ anthy_priv_dic_unlock(void) } } -void -anthy_priv_dic_update(void) -{ - if (!anthy_private_tt_dic) { - return ; - } - - anthy_trie_update_mapping(anthy_private_tt_dic); -} - /* seq_entに追加する */ static void add_to_seq_ent(const char *line, int encoding, struct seq_ent *seq) @@ -180,48 +157,6 @@ add_to_seq_ent(const char *line, int encoding, struct seq_ent *seq) anthy_free_xstr(xs); } -/* texttrieに登録されているかをチェックし、 - * 登録されていればseq_entに追加する - */ -static void -copy_words_from_tt(struct seq_ent *seq, xstr *xs, - int encoding, const char *prefix) -{ - char *key, *v; - int key_len; - char *key_buf; - int prefix_len = strlen(prefix); - /**/ - if (!anthy_private_tt_dic) { - return ; - } - key = anthy_xstr_to_cstr(xs, encoding); - key_len = strlen(key); - key_buf = malloc(key_len + 12); - /* 辞書中には各単語が「見出し XXXX」(XXXXはランダムな文字列)を - * キーとして保存されているので列挙する - */ - sprintf(key_buf, "%s%s ", prefix, key); - do { - if (strncmp(&key_buf[2], key, key_len) || - strncmp(&key_buf[0], prefix, prefix_len) || - key_buf[key_len+2] != ' ') { - /* 「見出し 」で始まっていないので対象外 */ - break; - } - /* 単語を読み出して登録 */ - v = anthy_trie_find(anthy_private_tt_dic, key_buf); - if (v) { - add_to_seq_ent(v, encoding, seq); - } - free(v); - /**/ - } while (anthy_trie_find_next_key(anthy_private_tt_dic, - key_buf, key_len + 8)); - free(key); - free(key_buf); -} - void anthy_copy_words_from_private_dic(struct seq_ent *seq, xstr *xs, int is_reverse) @@ -229,9 +164,6 @@ anthy_copy_words_from_private_dic(struct seq_ent *seq, if (is_reverse) { return ; } - /* 個人辞書から取ってくる */ - copy_words_from_tt(seq, xs, ANTHY_EUC_JP_ENCODING, " "); - copy_words_from_tt(seq, xs, ANTHY_UTF8_ENCODING, " p"); /**/ if (!anthy_select_section("UNKNOWN_WORD", 0) && !anthy_select_row(xs, 0)) { @@ -359,15 +291,10 @@ void anthy_init_private_dic(const char *id) { const char *home = anthy_conf_get_str("HOME"); - if (anthy_private_tt_dic) { - anthy_trie_close(anthy_private_tt_dic); - } - /**/ if (lock_fn) { free(lock_fn); } init_lock_fn(home, id); - anthy_private_tt_dic = open_tt_dic(home, id); /**/ anthy_private_text_dic = textdicname (home, "private_words_", id); anthy_imported_text_dic = textdicname (home, "imported_words_", id); @@ -378,10 +305,6 @@ anthy_init_private_dic(const char *id) void anthy_release_private_dic(void) { - if (anthy_private_tt_dic) { - anthy_trie_close(anthy_private_tt_dic); - anthy_private_tt_dic = NULL; - } free (anthy_private_text_dic); free (anthy_imported_text_dic); free(imported_dic_dir); diff --git a/src-worddic/texttrie.c b/src-worddic/texttrie.c deleted file mode 100644 index 9497a02..0000000 --- a/src-worddic/texttrie.c +++ /dev/null @@ -1,1516 +0,0 @@ -/* - * DEPRECATED, it is too hard to debug. - * you may use textdict instead - * - * Trie in Text - * - * *issues - * +correct API - * -iterator vs callback - * +robustness - * -error detection - * -auto correction - * -concurrent access - * +efficiency - * -lower memory consumption - * -disk space? - * - * on some file system like jffs2 on linux, writable mmap - * is not allowed, though you can write it. - * - */ -/* - * API - * anthy_trie_open() - * anthy_trie_close() - * anthy_trie_add() - * anthy_trie_find() - * anthy_trie_delete() - * anthy_trie_find_next_key() - * anthy_trie_find_prefix() - * anthy_trie_print_array() - * - * Copyright (C) 2005-2006 TABATA Yusuke - * - */ -/* - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -/* open & mmap */ -#include <unistd.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -/**/ -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <ctype.h> -#include <anthy/texttrie.h> -#include <anthy/filemap.h> -#include "dic_main.h" - -/* configs */ -#define OBJ_LEN 20 -#define LINE_LEN 32 -#define EXPAND_COUNT 16 - -/* cell type */ -#define TT_SUPER 0 -#define TT_UNUSED 1 -#define TT_ALLOCED 2 -#define TT_NODE 3 -#define TT_BODY 4 -#define TT_TAIL 5 - -/* cell structure */ -struct cell { - /* (common) type */ - int type; - /* union */ - union { - /* unused */ - int next_unused; - /* super */ - struct { - int first_unused; - int root_cell; - int size; - int serial; - } super; - /* node */ - struct { - int key; - int next; - int child; - int body; - int parent; - } node; - /* body */ - struct { - int owner; - char *obj; - } body; - /* tail */ - struct { - char *obj; - int prev; - } tail; - } u; - /* body & tail */ - int next_tail; -}; - -struct text_trie { - /**/ - int fatal; - /**/ - char *fn; - FILE *wfp; - struct filemapping *mapping; - char *ptr; - /**/ - struct cell super; - int valid_super; -}; - -struct path { - /**/ - const char *key_str; - /**/ - int max_len; - int *path; - int len; - int cur; -}; - -static void -print_super_cell(struct cell *c) -{ - printf("super first_unused=%d, root_cell=%d, size=%d, serial=%d\n", - c->u.super.first_unused, c->u.super.root_cell, - c->u.super.size, c->u.super.serial); -} - -static void -print_alloced_cell(void) -{ - printf("alloc-ed\n"); -} - -static void -print_node_cell(struct cell *c) -{ - printf("node key=%d", c->u.node.key); - if (c->u.node.key > 0 && isprint(c->u.node.key)) { - printf("(%c)", c->u.node.key); - } - printf(" parent=%d next=%d child=%d body=%d\n", - c->u.node.parent, c->u.node.next, c->u.node.child, c->u.node.body); -} - -static void -print_unused_cell(struct cell *c) -{ - printf("unused next_unused=%d\n", - c->u.next_unused); -} - -static void -print_body_cell(struct cell *c) -{ - printf("body object=(%s), owner=%d, next_tail=%d\n", - c->u.body.obj, c->u.body.owner, c->next_tail); -} - -static void -print_tail_cell(struct cell *c) -{ - printf("tail object=(%s), prev=%d, next_tail=%d\n", - c->u.tail.obj, c->u.tail.prev, c->next_tail); -} - -static void -print_cell(int idx, struct cell *c) -{ - if (!c) { - printf("idx =%d(null cell):\n", idx); - return ; - } - printf("idx=%d:", idx); - switch (c->type) { - case TT_SUPER: - print_super_cell(c); - break; - case TT_ALLOCED: - print_alloced_cell(); - break; - case TT_NODE: - print_node_cell(c); - break; - case TT_UNUSED: - print_unused_cell(c); - break; - case TT_BODY: - print_body_cell(c); - break; - case TT_TAIL: - print_tail_cell(c); - break; - default: - printf("unknown\n"); - } -} - -static void -path_setup(struct path *path, const char *key, int len, int *buf) -{ - unsigned char *p = (unsigned char *)key; - path->key_str = key; - path->max_len = len; - path->path = buf; - path->len = 0; - path->cur = 0; - /**/ - while (*p) { - path->path[path->len] = p[0] * 256 + p[1]; - path->len ++; - p++; - if (p[0]) { - p++; - } - } -} - -static void -path_copy_to_str(struct path *path, char *str, int buf_len) -{ - unsigned char *p = (unsigned char *)str; - int i, o; - for (i = 0, o = 0; i < path->cur && o < buf_len - 2; i++) { - p[o] = (path->path[i]>>8)&255; - p[o+1] = path->path[i]&255; - o += 2; - } - p[o] = 0; -} - -static int -sput_int(char *buf, int num) -{ - unsigned char *tmp = (unsigned char *)buf; - tmp[0] = (num>>24)&255; - tmp[1] = (num>>16)&255; - tmp[2] = (num>>8)&255; - tmp[3] = num&255; - return 4; -} - -static char * -sget_int(char *buf, int *num) -{ - unsigned int res; - unsigned char *tmp = (unsigned char *)buf; - res = 0; - res += tmp[0] << 24; - res += tmp[1] << 16; - res += tmp[2] << 8; - res += tmp[3]; - *num = res; - buf += 4; - return buf; -} - -static char * -pass_str(char *buf, const char *str) -{ - buf += strlen(str); - return buf; -} - -static void -encode_super(struct cell *c, char *buf) -{ - buf += sprintf(buf, "S "); - buf += sput_int(buf, c->u.super.size); - buf += sput_int(buf, c->u.super.root_cell); - buf += sput_int(buf, c->u.super.first_unused); - buf += sput_int(buf, c->u.super.serial); - buf += sput_int(buf, LINE_LEN); -} - -static void -encode_node(struct cell *c, char *buf) -{ - buf += sprintf(buf, "N "); - buf += sput_int(buf, c->u.node.key); - buf += sput_int(buf, c->u.node.parent); - buf += sput_int(buf, c->u.node.next); - buf += sput_int(buf, c->u.node.child); - buf += sput_int(buf, c->u.node.body); -} - -static void -encode_body(struct cell *c, char *buf) -{ - buf += sprintf(buf, "B"); - buf += sput_int(buf, c->next_tail); - buf += sput_int(buf, c->u.body.owner); - sprintf(buf, "\"%s\"", - c->u.body.obj); -} - -static void -encode_unused(struct cell *c, char *buf) -{ - buf += sprintf(buf, "-next="); - buf += sput_int(buf, c->u.next_unused); -} - -static void -encode_tail(struct cell *c, char *buf) -{ - buf += sprintf(buf, "T"); - buf += sput_int(buf, c->u.tail.prev); - buf += sput_int(buf, c->next_tail); - sprintf(buf, "\"%s\"", - c->u.tail.obj); -} - -static void -encode_unknown(char *buf) -{ - sprintf(buf, "?"); -} - -static void -encode_cell(struct cell *c, char *buf) -{ - switch (c->type) { - case TT_SUPER: - encode_super(c, buf); - break; - case TT_NODE: - encode_node(c, buf); - break; - case TT_UNUSED: - encode_unused(c, buf); - break; - case TT_BODY: - encode_body(c, buf); - break; - case TT_TAIL: - encode_tail(c, buf); - break; - default: - encode_unknown(buf); - break; - } -} - -static void -write_back_cell(struct text_trie *tt, struct cell *c, int idx) -{ - int i; - char buf[LINE_LEN+1]; - /* sanity check */ - if (((anthy_mmap_size(tt->mapping) / LINE_LEN) < (idx + 1)) || - idx < 0) { - return ; - } - for (i = 0; i < LINE_LEN; i++) { - buf[i] = ' '; - } - encode_cell(c, buf); - buf[LINE_LEN-1] = '\n'; - if (anthy_mmap_is_writable(tt->mapping)) { - memcpy(&tt->ptr[idx*LINE_LEN], buf, LINE_LEN); - } else { - fseek(tt->wfp, idx*LINE_LEN, SEEK_SET); - fwrite(buf, LINE_LEN, 1, tt->wfp); - fflush(tt->wfp); - } -} - -static char * -decode_str(char *raw_buf, int off) -{ - char *head; - char copy_buf[LINE_LEN + 1]; - char *buf; - int i; - /* from off to before last '\n' */ - for (i = 0; i < LINE_LEN - off - 1; i++) { - copy_buf[i] = raw_buf[i]; - } - copy_buf[i] = 0; - buf = copy_buf; - /* find first double quote */ - while (*buf && *buf != '\"') { - buf ++; - } - if (!*buf) { - /* cant find double quote */ - return strdup(""); - } - buf ++; - head = buf; - /* go to the end of string */ - while (*buf) { - buf ++; - } - /* find last double quote */ - while (*buf != '\"') { - buf--; - } - *buf = 0; - /**/ - return strdup(head); -} - -static void -release_cell_str(struct cell *c) -{ - if (!c) { - return ; - } - if (c->type == TT_BODY) { - free(c->u.body.obj); - } - if (c->type == TT_TAIL) { - free(c->u.tail.obj); - } -} - -static int -decode_super(struct cell *c, char *buf) -{ - c->type = TT_SUPER; - buf = pass_str(buf, "S "); - buf = sget_int(buf, &c->u.super.size); - buf = sget_int(buf, &c->u.super.root_cell); - buf = sget_int(buf, &c->u.super.first_unused); - buf = sget_int(buf, &c->u.super.serial); - return 0; -} - -static int -decode_unuse(struct cell *c, char *buf) -{ - c->type = TT_UNUSED; - buf = pass_str(buf, "-next="); - buf = sget_int(buf, &c->u.next_unused); - return 0; -} - -static int -decode_node(struct cell *c, char *buf) -{ - c->type = TT_NODE; - buf = pass_str(buf, "N "); - buf = sget_int(buf, &c->u.node.key); - buf = sget_int(buf, &c->u.node.parent); - buf = sget_int(buf, &c->u.node.next); - buf = sget_int(buf, &c->u.node.child); - buf = sget_int(buf, &c->u.node.body); - return 0; -} - -static int -decode_body(struct cell *c, char *buf) -{ - c->type = TT_BODY; - buf = pass_str(buf, "B"); - buf = sget_int(buf, &c->next_tail); - buf = sget_int(buf, &c->u.body.owner); - c->u.body.obj = decode_str(buf, 9); - return 0; -} - -static int -decode_tail(struct cell *c, char *buf) -{ - c->type = TT_TAIL; - buf = pass_str(buf, "T"); - buf = sget_int(buf, &c->u.tail.prev); - buf = sget_int(buf, &c->next_tail); - c->u.tail.obj = decode_str(buf, 9); - return 0; -} - -static int -decode_alloced(struct cell *c) -{ - c->type = TT_ALLOCED; - return 0; -} - -static struct cell * -decode_nth_cell(struct text_trie *tt, struct cell *c, int nth) -{ - int res; - char *buf; - if (nth < 0 || - (anthy_mmap_size(tt->mapping) / LINE_LEN) < - (nth + 1)) { - return NULL; - } - buf = &tt->ptr[nth*LINE_LEN]; - - res = -1; - switch (buf[0]) { - case 'S': - res = decode_super(c, buf); - break; - case '-': - res = decode_unuse(c, buf); - break; - case 'N': - res = decode_node(c, buf); - break; - case 'B': - res = decode_body(c, buf); - break; - case 'T': - res = decode_tail(c, buf); - break; - case '?': - res = decode_alloced(c); - break; - default: - /*printf("decode fail (nth=%d::%s).\n", nth, buf);*/ - ; - } - if (res) { - c->type = TT_UNUSED; - } - return c; -} - -static struct cell * -decode_nth_node(struct text_trie *tt, struct cell *c, int nth) -{ - if (!decode_nth_cell(tt, c, nth)) { - return NULL; - } - if (c->type != TT_NODE) { - return NULL; - } - return c; -} - -static int -update_mapping(struct text_trie *tt) -{ - if (tt->mapping) { - anthy_munmap(tt->mapping); - } - tt->mapping = anthy_mmap(tt->fn, 1); - if (!tt->mapping) { - /* try to fall back read-only mmap */ - tt->mapping = anthy_mmap(tt->fn, 0); - } - if (!tt->mapping) { - tt->ptr = NULL; - return 1; - } - tt->ptr = anthy_mmap_address(tt->mapping); - return 0; -} - -static int -expand_file(struct text_trie *tt, int count) -{ - char buf[LINE_LEN+1]; - int i; - for (i = 0; i < LINE_LEN; i++) { - buf[i] = ' '; - } - buf[LINE_LEN-1] = '\n'; - /**/ - for (i = 0; i < count; i++) { - int res; - res = fwrite(buf, LINE_LEN, 1, tt->wfp); - if (res != 1) { - return 1; - } - if (fflush(tt->wfp)) { - return 1; - } - } - return 0; -} - -static int -set_file_size(struct text_trie *tt, int len) -{ - int size = LINE_LEN * len; - int cur_size; - int err = 0; - - fseek(tt->wfp, 0, SEEK_END); - cur_size = ftell(tt->wfp); - if (cur_size == size) { - return 0; - } - if (cur_size > size) { - truncate(tt->fn, size); - } else { - err = expand_file(tt, (size - cur_size) / LINE_LEN); - if (!err) { - update_mapping(tt); - } else { - tt->fatal = 1; - } - } - - return err; -} - -static struct cell * -get_super_cell(struct text_trie *tt) -{ - /* cached? */ - if (tt->valid_super) { - return &tt->super; - } - /* read */ - if (decode_nth_cell(tt, &tt->super, 0)) { - tt->valid_super = 1; - return &tt->super; - } - /* create now */ - tt->super.type = TT_SUPER; - tt->super.u.super.first_unused = 0; - tt->super.u.super.root_cell = 0; - tt->super.u.super.size = 1; - tt->super.u.super.serial = 1; - if (set_file_size(tt, 1) != 0) { - return NULL; - } - write_back_cell(tt, &tt->super, 0); - tt->valid_super = 1; - return &tt->super; -} - -/* convenience function */ -static int -get_array_size(struct text_trie *a) -{ - struct cell *super = get_super_cell(a); - int size = super->u.super.size; - return size; -} - -/* convenience function */ -static int -get_root_idx(struct text_trie *tt) -{ - struct cell *super = get_super_cell(tt); - if (!super) { - return 0; - } - return super->u.super.root_cell; -} - -static int -expand_array(struct text_trie *tt, int len) -{ - int i; - struct cell *super; - int res; - int size = get_array_size(tt); - if (size >= len) { - return 0; - } - /* expand file */ - res = set_file_size(tt, len); - if (res) { - return 1; - } - /* fill unused */ - super = get_super_cell(tt); - for (i = super->u.super.size; i < len; i++) { - struct cell ex_cell; - ex_cell.type = TT_UNUSED; - ex_cell.u.next_unused = super->u.super.first_unused; - write_back_cell(tt, &ex_cell, i); - super->u.super.first_unused = i; - } - super->u.super.size = len; - write_back_cell(tt, super, 0); - return 0; -} - -void -anthy_trie_print_array(struct text_trie *tt) -{ - int i; - int size = get_array_size(tt); - print_cell(0, get_super_cell(tt)); - for (i = 1; i < size; i++) { - struct cell c; - decode_nth_cell(tt, &c, i); - print_cell(i, &c); - release_cell_str(&c); - } -} - -/* get unused cell */ -static int -get_unused_index(struct text_trie *tt) -{ - struct cell *super; - int unuse; - struct cell new_cell; - - super = get_super_cell(tt); - unuse = super->u.super.first_unused; - if (!unuse) { - /* expand array */ - expand_array(tt, super->u.super.size + EXPAND_COUNT); - unuse = super->u.super.first_unused; - if (!unuse) { - return 0; - } - } - if (!decode_nth_cell(tt, &new_cell, unuse)) { - tt->fatal = 1; - return 0; - } - super->u.super.first_unused = new_cell.u.next_unused; - new_cell.type = TT_ALLOCED; - write_back_cell(tt, &new_cell, unuse); - write_back_cell(tt, super, 0); - return unuse; -} - -static void -free_cell(struct text_trie *tt, int idx) -{ - struct cell *super = get_super_cell(tt); - struct cell c; - if (!decode_nth_cell(tt, &c, idx)) { - tt->fatal = 1; - } else { - c.type = TT_UNUSED; - c.u.next_unused = super->u.super.first_unused; - write_back_cell(tt, &c, idx); - } - super->u.super.first_unused = idx; - write_back_cell(tt, super, 0); -} - -static void -load_super(struct text_trie *tt) -{ - struct cell root, *super; - int root_idx; - super = get_super_cell(tt); - if (!super) { - tt->fatal = 1; - return ; - } - /**/ - if (super->u.super.root_cell) { - return ; - } - /**/ - root_idx = get_unused_index(tt); - if (root_idx == 0) { - tt->fatal = 1; - return ; - } - root.u.node.key = 0; - root.type = TT_NODE; - root.u.node.parent = 0; - root.u.node.next = 0; - root.u.node.child = 0; - root.u.node.body = 0; - write_back_cell(tt, &root, root_idx); - /**/ - tt->super.u.super.root_cell = root_idx; - write_back_cell(tt, super, 0); -} - -static void -purge_cache(struct text_trie *tt) -{ - if (tt) { - tt->valid_super = 0; - } -} - -static FILE * -do_fopen(const char *fn, int create) -{ - int fd; - if (!create) { - /* check file existance */ - FILE *fp; - fp = fopen(fn, "r"); - if (!fp) { - return NULL; - } - fclose(fp); - } - fd = open(fn, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); - if (fd == -1) { - return NULL; - } - return fdopen(fd, "w"); -} - -static struct text_trie * -alloc_tt(const char *fn, FILE *wfp) -{ - struct text_trie *tt; - tt = malloc(sizeof(struct text_trie)); - tt->fatal = 0; - tt->wfp = wfp; - tt->valid_super = 0; - tt->fn = strdup(fn); - tt->mapping = NULL; - return tt; -} - -static void -clear_file(const char *fn) -{ - FILE *fp = fopen(fn, "w"); - if (fp) { - fclose(fp); - } -} - -static struct text_trie * -trie_open(const char *fn, int create, int do_retry) -{ - struct text_trie *tt; - FILE *fp; - /**/ - fp = do_fopen(fn, create); - if (!fp) { - return NULL; - } - /**/ - tt = alloc_tt(fn, fp); - if (!tt) { - fclose(fp); - return NULL; - } - /**/ - update_mapping(tt); - load_super(tt); - /**/ - if (tt->fatal) { - anthy_trie_close(tt); - if (!do_retry) { - return NULL; - } - clear_file(fn); - return trie_open(fn, create, 0); - } - /**/ - return tt; -} - - -/* API */ -struct text_trie * -anthy_trie_open(const char *fn, int create) -{ - struct text_trie *tt; - anthy_priv_dic_lock(); - tt = trie_open(fn, create, 1); - anthy_priv_dic_unlock(); - purge_cache(tt); - return tt; -} - -/* API */ -void -anthy_trie_close(struct text_trie *tt) -{ - if (!tt) { - return ; - } - fclose(tt->wfp); - anthy_munmap(tt->mapping); - free(tt->fn); - free(tt); -} - -/* API */ -void -anthy_trie_update_mapping(struct text_trie *tt) -{ - if (!tt) { - return ; - } - anthy_priv_dic_lock(); - update_mapping(tt); - anthy_priv_dic_unlock(); -} - -static void -graft_child(struct text_trie *tt, int parent_idx, int new_idx) -{ - struct cell parent_cell; - struct cell new_cell; - struct cell cur_youngest_cell; - int cur_idx; - /**/ - if (!decode_nth_node(tt, &parent_cell, parent_idx)) { - return ; - } - /**/ - if (parent_cell.u.node.child == 0) { - /* 1st child */ - parent_cell.u.node.child = new_idx; - write_back_cell(tt, &parent_cell, parent_idx); - return ; - } - - if (!decode_nth_node(tt, &cur_youngest_cell, parent_cell.u.node.child)) { - return ; - } - if (!decode_nth_node(tt, &new_cell, new_idx)) { - return ; - } - if (new_cell.u.node.key < cur_youngest_cell.u.node.key) { - /* newly added younger child */ - new_cell.u.node.next = parent_cell.u.node.child; - parent_cell.u.node.child = new_idx; - write_back_cell(tt, &new_cell, new_idx); - write_back_cell(tt, &parent_cell, parent_idx); - return; - } - - /* insert some order */ - cur_idx = parent_cell.u.node.child; - while (cur_idx) { - int next_idx; - struct cell cur_cell, tmp_cell; - struct cell *next_cell = NULL; - if (!decode_nth_node(tt, &cur_cell, cur_idx)) { - return ; - } - next_idx = cur_cell.u.node.next; - /**/ - if (next_idx) { - next_cell = decode_nth_node(tt, &tmp_cell, next_idx); - } - if (!next_cell) { - /* append */ - new_cell.u.node.next = 0; - cur_cell.u.node.next = new_idx; - write_back_cell(tt, &cur_cell, cur_idx); - break; - } else { - if (cur_cell.u.node.key < new_cell.u.node.key && - new_cell.u.node.key < next_cell->u.node.key) { - cur_cell.u.node.next = new_idx; - new_cell.u.node.next = next_idx; - write_back_cell(tt, &cur_cell, cur_idx); - break; - } - } - cur_idx = next_idx; - } - write_back_cell(tt, &new_cell, new_idx); -} - -static int -find_child(struct text_trie *tt, int parent_idx, int key, int exact) -{ - int child_idx; - int prev_key; - struct cell parent_cell; - - if (!decode_nth_node(tt, &parent_cell, parent_idx)) { - return 0; - } - - /**/ - prev_key = 0; - child_idx = parent_cell.u.node.child; - - /**/ - while (child_idx) { - struct cell child_cell; - int this_key; - /**/ - if (!decode_nth_node(tt, &child_cell, child_idx)) { - return 0; - } - this_key = child_cell.u.node.key; - if (this_key <= prev_key) { - return 0; - } - /**/ - if (exact && this_key == key) { - return child_idx; - } - if (!exact && (this_key & 0xff00) == (key & 0xff00)) { - return child_idx; - } - child_idx = child_cell.u.node.next; - prev_key = this_key; - } - return 0; -} - -static int -trie_search_rec(struct text_trie *tt, struct path *p, - int parent_idx, int create) -{ - int child_idx; - int key = p->path[p->cur]; - /* special case */ - if (p->cur == p->len) { - return parent_idx; - } - /* scan child */ - child_idx = find_child(tt, parent_idx, key, 1); - if (!child_idx) { - struct cell child_cell; - if (!create) { - return 0; - } - /* add child */ - child_idx = get_unused_index(tt); - if (!child_idx) { - return 0; - } - if (!decode_nth_cell(tt, &child_cell, child_idx)) { - return 0; - } - child_cell.type = TT_NODE; - child_cell.u.node.parent = parent_idx; - child_cell.u.node.key = key; - child_cell.u.node.next = 0; - child_cell.u.node.child = 0; - child_cell.u.node.body = 0; - write_back_cell(tt, &child_cell, child_idx); - /* graft */ - graft_child(tt, parent_idx, child_idx); - } - p->cur ++; - key ++; - if (!key) { - return child_idx; - } - return trie_search_rec(tt, p, child_idx, create); -} - -static char * -get_str_part(const char *str, int from) -{ - char buf[OBJ_LEN+1]; - int i; - for (i = 0; i < OBJ_LEN; i++) { - buf[i] = str[from+i]; - } - buf[i] = 0; - return strdup(buf); -} - -static void -release_body(struct text_trie *tt, int idx) -{ - struct cell c; - int tail_idx; - if (!decode_nth_cell(tt, &c, idx) || - c.type != TT_BODY) { - return ; - } - tail_idx = c.next_tail; - while (tail_idx) { - struct cell tail_cell; - int tmp; - if (!decode_nth_cell(tt, &tail_cell, tail_idx)) { - break; - } - tmp = tail_cell.next_tail; - free_cell(tt, tail_idx); - tail_idx = tmp; - } - free_cell(tt, idx); -} - -static void -set_body(struct text_trie *tt, int idx, const char *body_str) -{ - int body_idx = get_unused_index(tt); - int len; - int i; - struct cell node_cell; - struct cell body_cell; - struct cell prev_cell; - struct cell tail_cell; - int prev_idx; - /**/ - if (!decode_nth_cell(tt, &node_cell, idx)) { - return ; - } - if (node_cell.u.node.body) { - release_body(tt, node_cell.u.node.body); - } - len = strlen(body_str); - /**/ - node_cell.u.node.body = body_idx; - write_back_cell(tt, &node_cell, idx); - /**/ - if (!decode_nth_cell(tt, &body_cell, body_idx)) { - return ; - } - body_cell.type = TT_BODY; - body_cell.u.body.obj = get_str_part(body_str, 0); - body_cell.u.body.owner = idx; - body_cell.next_tail = 0; - write_back_cell(tt, &body_cell, body_idx); - release_cell_str(&body_cell); - /**/ - if (!decode_nth_cell(tt, &body_cell, body_idx)) { - return ; - } - /**/ - prev_idx = body_idx; - prev_cell = body_cell; - for (i = OBJ_LEN; i < len; i += OBJ_LEN) { - int tail_idx = get_unused_index(tt); - if (!decode_nth_cell(tt, &tail_cell, tail_idx)) { - return ; - } - tail_cell.type = TT_TAIL; - tail_cell.u.tail.obj = get_str_part(body_str, i); - tail_cell.u.tail.prev = prev_idx; - tail_cell.next_tail = 0; - prev_cell.next_tail = tail_idx; - write_back_cell(tt, &tail_cell, tail_idx); - write_back_cell(tt, &prev_cell, prev_idx); - release_cell_str(&prev_cell); - /**/ - prev_idx = tail_idx; - prev_cell = tail_cell; - } - if (i != OBJ_LEN) { - release_cell_str(&tail_cell); - } -} - -static int -trie_add(struct text_trie *tt, struct path *p, const char *body) -{ - int root_idx = get_root_idx(tt); - int target_idx; - /**/ - if (root_idx == 0) { - return -1; - } - target_idx = trie_search_rec(tt, p, root_idx, 1); - if (target_idx) { - set_body(tt, target_idx, body); - } - return 0; -} - -/* API */ -int -anthy_trie_add(struct text_trie *tt, const char *key, const char *body) -{ - int res; - int len; - struct path p; - if (!tt || tt->fatal) { - return -1; - } - len = strlen(key); - path_setup(&p, key, len, alloca(sizeof(int)*len)); - anthy_priv_dic_lock(); - res = trie_add(tt, &p, body); - anthy_priv_dic_unlock(); - purge_cache(tt); - return res; -} - -static int -get_object_length(struct text_trie *tt, int body_idx) -{ - int len = 0; - int idx = body_idx; - while (idx) { - struct cell c; - if (!decode_nth_cell(tt, &c, idx)) { - return 0; - } - idx = c.next_tail; - len += OBJ_LEN; - release_cell_str(&c); - } - return len; -} - -static char * -gather_str(struct text_trie *tt, int body_idx) -{ - int idx; - char *buf; - int len; - /* count length */ - len = get_object_length(tt, body_idx); - if (len == 0) { - return NULL; - } - /**/ - buf = malloc(len + 1); - idx = body_idx; - len = 0; - while (idx) { - struct cell c; - if (!decode_nth_cell(tt, &c, idx)) { - free(buf); - return NULL; - } - if (len == 0) { - sprintf(&buf[len], "%s", c.u.body.obj); - } else { - sprintf(&buf[len], "%s", c.u.tail.obj); - } - idx = c.next_tail; - len += OBJ_LEN; - release_cell_str(&c); - } - return buf; -} - -static char * -trie_find(struct text_trie *tt, struct path *p) -{ - int root_idx; - int target_idx; - root_idx = get_root_idx(tt); - if (!root_idx) { - return NULL; - } - target_idx = trie_search_rec(tt, p, root_idx, 0); - if (target_idx) { - struct cell target_cell; - int body_idx; - if (!decode_nth_node(tt, &target_cell, target_idx)) { - return NULL; - } - body_idx = target_cell.u.node.body; - if (body_idx) { - return gather_str(tt, body_idx); - } - } - return NULL; -} - -/* API */ -char * -anthy_trie_find(struct text_trie *tt, char *key) -{ - char *res; - struct path p; - int len; - if (!tt || tt->fatal) { - return NULL; - } - len = strlen(key); - path_setup(&p, key, len, alloca(sizeof(int)*len)); - anthy_priv_dic_lock(); - res = trie_find(tt, &p); - anthy_priv_dic_unlock(); - purge_cache(tt); - return res; -} - -static int -do_find_next_key(struct text_trie *tt, struct path *p, - int root_idx, int target_idx) -{ - struct cell *target_cell, tmp_cell; - int prev_is_up = 0; - target_cell = decode_nth_node(tt, &tmp_cell, target_idx); - /**/ - do { - /* one step */ - if (!target_cell) { - return -1; - } - if (!prev_is_up && target_cell->u.node.child) { - prev_is_up = 0; - target_idx = target_cell->u.node.child; - p->cur++; - } else if (target_cell->u.node.next) { - prev_is_up = 0; - target_idx = target_cell->u.node.next; - } else if (target_cell->u.node.parent) { - prev_is_up = 1; - target_idx = target_cell->u.node.parent; - p->cur--; - } else { - return -1; - } - target_cell = decode_nth_node(tt, &tmp_cell, target_idx); - if (!target_cell) { - return -1; - } - if (p->cur >= p->max_len) { - continue; - } - if (p->cur == 0) { - return -1; - } - p->path[p->cur-1] = target_cell->u.node.key; - if (!prev_is_up && target_cell->u.node.body) { - return 0; - } - } while (target_idx != root_idx); - return -1; -} - -static int -find_partial_key(struct text_trie *tt, struct path *p, int idx) -{ - struct cell c; - if (!decode_nth_node(tt, &c, idx)) { - return -1; - } - if (c.type != TT_NODE) { - return -1; - } - p->len ++; - p->path[p->cur] = c.u.node.key; - p->cur ++; - return 0; -} - -static int -trie_find_next_key(struct text_trie *tt, struct path *p) -{ - int root_idx = get_root_idx(tt); - int target_idx; - int tmp_idx; - /**/ - target_idx = trie_search_rec(tt, p, root_idx, 0); - if (target_idx > 0) { - /* easy case */ - return do_find_next_key(tt, p, root_idx, target_idx); - } - if ((p->path[p->len-1] & 0xff) != 0) { - /* simply not exist in tree */ - return -1; - } - /* special case */ - p->len --; - p->cur = 0; - target_idx = trie_search_rec(tt, p, root_idx, 0); - tmp_idx = find_child(tt, target_idx, p->path[p->len], 0); - if (tmp_idx) { - return find_partial_key(tt, p, tmp_idx); - } - return do_find_next_key(tt, p, root_idx, target_idx); -} - - -/* API */ -char * -anthy_trie_find_next_key(struct text_trie *tt, char *buf, int buf_len) -{ - int res; - struct path p; - if (!tt || tt->fatal) { - return NULL; - } - path_setup(&p, buf, buf_len, alloca(sizeof(int)*buf_len)); - anthy_priv_dic_lock(); - res = trie_find_next_key(tt, &p); - anthy_priv_dic_unlock(); - purge_cache(tt); - if (res) { - return NULL; - } - path_copy_to_str(&p, buf, buf_len); - return buf; -} - -static void -trie_find_prefix(struct text_trie *tt, const char *str, - char *buf, int buf_len, - int (*cb)(const char *key, const char *str)) -{ - int idx = get_root_idx(tt); - int i, len = strlen(str); - for (i = 0; i < len && i < buf_len; i++) { - struct cell c; - idx = find_child(tt, idx, str[i], 1); - if (!idx) { - return ; - } - if (!decode_nth_node(tt, &c, idx)) { - return ; - } - buf[i] = idx; - buf[i+1] = 0; - if (c.u.node.body) { - char *s = gather_str(tt, c.u.node.body); - if (cb) { - cb(buf, s); - } - free(s); - } - } -} - -void -anthy_trie_find_prefix(struct text_trie *tt, const char *str, - char *buf, int buf_len, - int (*cb)(const char *key, const char *str)) -{ - if (!tt || tt->fatal) { - return ; - } - anthy_priv_dic_lock(); - trie_find_prefix(tt, str, buf, buf_len, cb); - anthy_priv_dic_unlock(); - purge_cache(tt); -} - -static void -disconnect(struct text_trie *tt, int parent_idx, int target_idx) -{ - struct cell parent_cell; - struct cell target_cell; - - if (!decode_nth_node(tt, &parent_cell, parent_idx) || - !decode_nth_node(tt, &target_cell, target_idx)) { - return ; - } - - if (parent_cell.u.node.child == target_idx) { - /* 1st child */ - parent_cell.u.node.child = target_cell.u.node.next; - write_back_cell(tt, &parent_cell, parent_idx); - if (!target_cell.u.node.next && - !parent_cell.u.node.body) { - /* only child and parent does not have body, so traverse upward */ - disconnect(tt, parent_cell.u.node.parent, parent_idx); - free_cell(tt, target_idx); - return ; - } - free_cell(tt, target_idx); - } else { - /* not 1st child */ - int child_idx = parent_cell.u.node.child; - while (child_idx) { - struct cell cur; - if (!decode_nth_cell(tt, &cur, child_idx)) { - return ; - } - if (cur.u.node.next == target_idx) { - /**/ - cur.u.node.next = target_cell.u.node.next; - write_back_cell(tt, &cur, child_idx); - free_cell(tt, target_idx); - return ; - } - child_idx = cur.u.node.next; - } - } -} - -static void -trie_delete(struct text_trie *tt, struct path *p) -{ - struct cell target_cell; - int root_idx = get_root_idx(tt); - int target_idx, parent_idx; - target_idx = trie_search_rec(tt, p, root_idx, 0); - if (!target_idx) { - return ; - } - if (!decode_nth_node(tt, &target_cell, target_idx)) { - return ; - } - release_body(tt, target_cell.u.node.body); - target_cell.u.node.body = 0; - write_back_cell(tt, &target_cell, target_idx); - if (target_cell.u.node.child) { - return ; - } - parent_idx = target_cell.u.node.parent; - disconnect(tt, parent_idx, target_idx); -} - -/* API */ -void -anthy_trie_delete(struct text_trie *tt, const char *key) -{ - struct path p; - int len; - if (!tt || tt->fatal) { - return ; - } - len = strlen(key); - path_setup(&p, key, len, alloca(sizeof(int)*len)); - anthy_priv_dic_lock(); - trie_delete(tt, &p); - anthy_priv_dic_unlock(); - purge_cache(tt); -} diff --git a/src-worddic/word_dic.c b/src-worddic/word_dic.c index e760304..82d6a53 100644 --- a/src-worddic/word_dic.c +++ b/src-worddic/word_dic.c @@ -223,7 +223,7 @@ scan_misc_dic(struct gang_elm **array, int nr, int is_reverse) xstr *xs = &array[i]->xs; struct seq_ent *seq; seq = anthy_cache_get_seq_ent(xs, is_reverse); - /* 個人辞書からの取得(texttrie(旧形式)と未知語辞書) */ + /* 個人辞書からの取得(未知語辞書) */ if (seq) { anthy_copy_words_from_private_dic(seq, xs, is_reverse); anthy_validate_seq_ent(seq, xs, is_reverse); @@ -655,7 +655,6 @@ void anthy_lock_dic(void) { anthy_priv_dic_lock(); - anthy_priv_dic_update(); } /* フロントエンドから呼ばれる */