+
+// IndexDatabase --------------------------------------------------------------
+
+TM::index_data_type::index_data_type()
+ : index_id(0)
+ , word_id(0)
+ , is_stop_word(false)
+{
+}
+
+TM::index_data_type::pointer TM::index_data_type::create()
+{
+ return pointer(new index_data_type());
+}
+
+TM::IndexDatabase::IndexDatabase(Settings *settings, QString dbname)
+{
+ m_index_limit = 20 * 1024;
+
+ m_database_name =
+ settings->value(TMDATABASE_ROOT_PATH_KEY).toString()
+ + "/" + dbname;
+ open(Q_FUNC_INFO);
+
+ // テーブル作成。
+ exec("CREATE TABLE IF NOT EXISTS indexes("
+ "index_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,"
+ "word_id INTEGER UNIQUE NOT NULL,"
+ "sentence_ids BLOB,"
+ "is_stop_word BOOLEAN);", Q_FUNC_INFO);
+
+ exec("CREATE INDEX IF NOT EXISTS word_id_index ON indexes(word_id);", Q_FUNC_INFO);
+
+ // クエリ作成。
+ m_find_index = prepare(
+ "SELECT * FROM indexes WHERE word_id=?;", Q_FUNC_INFO);
+ m_insert_index = prepare(
+ "INSERT INTO indexes(word_id, sentence_ids, is_stop_word) "
+ "VALUES(?, ?, ?);", Q_FUNC_INFO);
+ m_update_index = prepare(
+ "UPDATE indexes SET "
+ "word_id=?,"
+ "sentence_ids=?,"
+ "is_stop_word=? "
+ "WHERE index_id=?;", Q_FUNC_INFO);
+}
+
+TM::index_data_type::pointer TM::IndexDatabase::find_index(quint32 word_id)
+{
+ assert(word_id);
+
+ m_find_index.bindValue(0, word_id);
+ exec(m_find_index, Q_FUNC_INFO);
+
+ index_data_type::pointer result;
+
+ if(m_find_index.next())
+ {
+ result = index_data_type::create();
+ result->index_id = m_find_index.value(0).toUInt();
+ result->word_id = m_find_index.value(1).toUInt();
+ QByteArray sentence_ids = m_find_index.value(2).toByteArray();
+ QDataStream ds(sentence_ids);
+ while(!ds.atEnd())
+ {
+ quint32 sentence_id;
+ ds >> sentence_id;
+ result->sentence_ids.insert(sentence_id);
+ }
+ result->is_stop_word = m_find_index.value(3).toBool();
+ }
+
+ return result;
+}
+
+void TM::IndexDatabase::insert_index(WordDatabase::pointer word_database,
+ sentence_data_type::pointer source, quint32 target_id)
+{
+ QString const &string = source->sentence;
+ for(QPair<int,int> const & range: source->words)
+ {
+ QString word = string.mid(
+ range.first, range.second - range.first + 1);
+ if(word.size() <= 2) continue;
+
+ int word_id = 0;
+ word_id = word_database->word_id(word);
+ assert(word_id);
+
+ index_data_type::pointer index_data = find_index(word_id);
+ if(index_data)
+ {
+ index_data->sentence_ids.insert(target_id);
+ // レコードが大きくなりすぎた場合、ストップワードとする。
+ if(m_index_limit < index_data->sentence_ids.size())
+ {
+ index_data->sentence_ids.clear();
+ index_data->is_stop_word = true;
+ }
+ update_index(index_data);
+ }
+ else
+ {
+ index_data = index_data_type::create();
+ index_data->word_id = word_id;
+ index_data->sentence_ids.insert(target_id);
+ index_data->is_stop_word = false;
+ insert_index(index_data);
+ }
+ }
+}
+
+void TM::IndexDatabase::insert_index(index_data_type::pointer index_data)
+{
+ assert(index_data);
+
+ QByteArray sentence_ids;
+ QDataStream ds(&sentence_ids, QIODevice::WriteOnly);
+ for(quint32 sentence_id : index_data->sentence_ids) ds << sentence_id;
+
+ m_insert_index.bindValue(0, index_data->word_id);
+ m_insert_index.bindValue(1, sentence_ids);
+ m_insert_index.bindValue(2, index_data->is_stop_word);
+
+ exec(m_insert_index, Q_FUNC_INFO);
+}
+
+void TM::IndexDatabase::update_index(index_data_type::pointer index_data)
+{
+ assert(index_data);
+
+ QByteArray sentence_ids;
+ QDataStream ds(&sentence_ids, QIODevice::WriteOnly);
+ for(quint32 sentence_id : index_data->sentence_ids) ds << sentence_id;
+
+ m_update_index.bindValue(0, index_data->word_id);
+ m_update_index.bindValue(1, sentence_ids);
+ m_update_index.bindValue(2, index_data->is_stop_word);
+ m_update_index.bindValue(3, index_data->index_id);
+
+ exec(m_update_index, Q_FUNC_INFO);
+}
+
+TM::IndexDatabase::pointer TM::IndexDatabase::create(Settings *settings, QString dbname)
+{
+ return pointer(new IndexDatabase(settings, dbname));
+}
+