OSDN Git Service

ns_search: implement updating story's index
authorhylom <hylom@users.sourceforge.jp>
Fri, 23 Mar 2018 12:57:55 +0000 (21:57 +0900)
committerhylom <hylom@users.sourceforge.jp>
Fri, 23 Mar 2018 12:57:55 +0000 (21:57 +0900)
src/ns_search/lucene_wrapper/wrapper.py
src/ns_search/newslash_db/__init__.py
src/ns_search/newslash_db/base.py
src/ns_search/newslash_db/stories/stories.py
src/ns_search/newslash_index/__init__.py
src/ns_search/newslash_index/index.py
src/ns_search/test/index.py [deleted file]
src/ns_search/test/test_config.py.example [new file with mode: 0644]
src/ns_search/test/test_lucene_wrapper.py [new file with mode: 0644]
src/ns_search/test/test_newslash_index.py [new file with mode: 0644]

index adf988d..75ba5c9 100644 (file)
@@ -64,6 +64,9 @@ class Indexer(LuceneWrapper):
     def update(self, term, doc):
         self._writer.updateDocument(term, doc.doc)
         return self
+
+    def delete(self, query):
+        self._writer.deleteDocuments(query.query)
     
     def __enter__(self):
         self._get_writer()
@@ -96,6 +99,12 @@ class Document(LuceneWrapper):
         self.doc.add(IntField(name, value, self._store_flag(store)))
         return self
 
+    def remove_field(self, name):
+        self.doc.removeField(name)
+
+    def remove_fields(self, name):
+        self.doc.removeFields(name)
+
 
 class Searcher(LuceneWrapper):
     def __init__(self, **kwargs):
index edb4dd9..4784212 100644 (file)
@@ -4,7 +4,7 @@ __author__ = 'Hiromichi Matsushima <hylom@users.osdn.net>'
 __version__ = "0.1.0"
 
 from stories import Stories
-from base import NewslashDB
+from base import NewslashDB, NewslashDBProgrammingError
 
 __all__ = []
 
index 41734ad..46ca12c 100644 (file)
@@ -30,17 +30,43 @@ class NewslashDB(object):
 
     def execute(self, query, **kwargs):
         cur = self.cursor()
-        cur.execute(query, params=kwargs)
+        try:
+            cur.execute(query, params=kwargs)
+        except mysql.connector.ProgrammingError as e:
+            raise NewslashDBProgrammingError(e)
         return cur
 
+    def start_transaction(self):
+        if not self._conn:
+            self.connect()
+        self._conn.start_transaction()
+
+    def commit(self):
+        self._conn.commit()
+
+    def rollback(self):
+        self._conn.rollback()
+
     def connect(self):
         self._conn = mysql.connector.connect(user=self.config("user"),
                                              password=self.config("password"),
                                              database=self.config("name"),
                                              host=self.config("host"))
+        self._conn.autocommit = True
         
     def close(self):
         if self._conn:
             self._conn.close()
             self._conn = None
 
+
+class NewslashDBError(Exception):
+    def __str__(self):
+        return self.message
+
+class NewslashDBProgrammingError(NewslashDBError):
+    def __init__(self, e):
+        self.message = str(e)
+        self.errno = e.errno
+
+
index fa12721..54f9d71 100644 (file)
 # coding: utf-8
 '''stories.py: newslash story related database module'''
 
-from ..base import NewslashDB
+from ..base import NewslashDB, NewslashDBError
+from datetime import datetime
+
+CREATE_STORIES_TABLE_SQL = '''
+CREATE TABLE IF NOT EXISTS `stories` (
+`stoid`               mediumint(8) unsigned NOT NULL AUTO_INCREMENT,
+`sid`                 varchar(16)           NOT NULL DEFAULT '',
+`uid`                 mediumint(8) unsigned NOT NULL DEFAULT '0',
+`dept`                varchar(100)          NOT NULL DEFAULT '',
+`time`                datetime              NOT NULL DEFAULT '1900-01-01 00:00:00',
+`hits`                mediumint(8) unsigned NOT NULL DEFAULT '0',
+`discussion`          mediumint(8) unsigned DEFAULT NULL,
+`primaryskid`         smallint(6)           NOT NULL DEFAULT '1',
+`tid`                 int(11)               NOT NULL DEFAULT '49',
+`submitter`           mediumint(8) unsigned NOT NULL DEFAULT '0',
+`commentcount`        smallint(5) unsigned  NOT NULL DEFAULT '0',
+`hitparade`           varchar(64)           NOT NULL DEFAULT '0,0,0,0,0,0,0',
+`is_archived`         enum('no','yes')      NOT NULL DEFAULT 'no',
+`in_trash`            enum('no','yes')      NOT NULL DEFAULT 'no',
+`day_published`       date                  NOT NULL DEFAULT '1900-01-01',
+`qid`                 mediumint(8) unsigned DEFAULT NULL,
+`last_update`         timestamp             NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+`body_length`         mediumint(8) unsigned NOT NULL DEFAULT '0',
+`word_count`          mediumint(8) unsigned NOT NULL DEFAULT '0',
+`archive_last_update` datetime              NOT NULL DEFAULT '1970-01-01 00:00:00',
+`sponsor`             tinyint(3) unsigned   DEFAULT '0',
+`stuck`               enum('no','yes')      DEFAULT 'no',
+`stuckpos`            tinyint(3) unsigned   DEFAULT '0',
+`stuckendtime`        datetime              NOT NULL DEFAULT '1900-01-01 00:00:00',
+PRIMARY KEY             (`stoid`),
+UNIQUE KEY `sid`        (`sid`),
+KEY `uid`               (`uid`),
+KEY `is_archived`       (`is_archived`),
+KEY `time`              (`time`),
+KEY `submitter`         (`submitter`),
+KEY `day_published`     (`day_published`),
+KEY `skidtid`           (`primaryskid`,`tid`),
+KEY `discussion_stoid`  (`discussion`,`stoid`),
+KEY `time_stoid`        (`time`,`stoid`),
+KEY `trash_time`        (`in_trash`,`time`),
+KEY `ibfk_converttid_4` (`tid`),
+KEY `stuckendtime`      (`stuckendtime`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=DYNAMIC
+'''
+
+CREATE_STORY_TEXT_TABLE_SQL = '''
+CREATE TABLE IF NOT EXISTS `story_text` (
+`stoid`       mediumint(8) unsigned NOT NULL DEFAULT '0',
+`title`       varchar(100)          NOT NULL DEFAULT '',
+`introtext`   mediumtext,
+`bodytext`    mediumtext,
+`relatedtext` mediumtext,
+`rendered`    mediumtext,
+PRIMARY KEY (`stoid`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=DYNAMIC
+'''
 
 class Stories(NewslashDB):
     def __init__(self, config):
         super(Stories, self).__init__(config)
 
+    def create_tables(self):
+        '''this function implemented for test purpose...'''
+        self.start_transaction()
+
+        try:
+            self.execute(CREATE_STORIES_TABLE_SQL)
+            self.execute(CREATE_STORY_TEXT_TABLE_SQL)
+        except NewslashDBError as e:
+            self.rollback()
+            raise e
+
+        self.commit()
+
+    def drop_tables(self):
+        '''this function implemented for test purpose...'''
+        self.start_transaction()
+
+        try:
+            self.execute('DROP TABLE stories')
+            self.execute('DROP TABLE story_text')
+        except NewslashDBError as e:
+            self.rollback()
+            raise e
+
+        self.commit()
+
+    def insert(self, **kwargs):
+        '''this function implemented for test purpose...'''
+        sql_stories = ('INSERT INTO stories (sid, uid, dept, time)'
+                       '  VALUES (%(sid)s, %(uid)s, %(dept)s, %(time)s)')
+        sql_story_text = ('INSERT INTO story_text (stoid, title, introtext, bodytext, relatedtext, rendered)'
+                          '  VALUES (%(stoid)s, %(title)s, %(introtext)s, "", "", "")')
+
+        self.start_transaction()
+
+        try:
+            cur = self.execute(sql_stories, **kwargs)
+            kwargs["stoid"] = cur.lastrowid
+            self.execute(sql_story_text, **kwargs)
+        except NewslashDBError as e:
+            self.rollback()
+            raise e
+
+        self.commit()
+        return kwargs["stoid"]
+
+    def update(self, **kwargs):
+        '''this function implemented for test purpose...'''
+        kwargs["last_update"] = datetime.now()
+        sql_stories = ('UPDATE stories'
+                       '  SET uid=%(uid)s, dept=%(dept)s, last_update=%(last_update)s'
+                       '  WHERE stoid=%(stoid)s')
+        sql_story_text = ('UPDATE story_text'
+                          '  SET title=%(title)s, introtext=%(introtext)s'
+                          '  WHERE stoid=%(stoid)s')
+
+        self.start_transaction()
+
+        try:
+            self.execute(sql_stories, **kwargs)
+            self.execute(sql_story_text, **kwargs)
+        except NewslashDBError as e:
+            self.rollback()
+            raise e
+
+        self.commit()
+        return
+
     def select(self, **kwargs):
         if not "limit" in kwargs:
             kwargs["limit"] = self.config("default_limit")
@@ -19,10 +142,10 @@ class Stories(NewslashDB):
                      "  LEFT JOIN story_text USING(stoid)"
                      "  WHERE stories.stoid = %(stoid)s"
                      "  ORDER BY stories.stoid DESC LIMIT %(limit)s OFFSET %(offset)s")
-        elif "last_update_gt" in kwargs:
+        elif "last_update_ge" in kwargs:
             query = ("SELECT stories.*, story_text.* FROM stories"
                      "  LEFT JOIN story_text USING(stoid)"
-                     "  WHERE stories.stoid <= %(stoid_gt)s AND stories.last_update > %(last_update_gt)"
+                     "  WHERE stories.stoid <= %(stoid_le)s AND stories.last_update >= %(last_update_ge)s"
                      "  ORDER BY stories.stoid DESC LIMIT %(limit)s OFFSET %(offset)s")
         elif "stoid_gt" in kwargs:
             query = ("SELECT stories.*, story_text.* FROM stories"
index b047fe7..d4fbfe7 100644 (file)
@@ -6,6 +6,13 @@ __version__ = "0.1.0"
 
 from index import Index
 from exceptions import *
+import htmlutil
+
+import lucene_wrapper
+
+def init_vm():
+    lucene_wrapper.init_vm()
+
 
 __all__ = []
 
index 7cca1b0..afc709e 100644 (file)
@@ -7,6 +7,7 @@ import htmlutil
 import calendar
 from datetime import datetime
 
+
 from mysql.connector.errors import ProgrammingError
 import exceptions
 
@@ -62,14 +63,20 @@ class Index(object):
 
         db.close()
 
+    def drop_metadata_table(self):
+        db = newslash_db.NewslashDB(self._db_config())
+        db.execute('DROP TABLE ns_search_metadata')
+        db.close()
+        
+
     def get_metadata(self, target):
         sql = 'SELECT * from ns_search_metadata WHERE target_name = %(target)s'
         db = newslash_db.NewslashDB(self._db_config())
         cur = db.execute(sql, target=target)
-        if len(cur) > 0:
-            result = dict(zip(cur.column_names, cur[0]))
+        if cur.rowcount > 0:
+            result = dict(zip(cur.column_names, cur.fetchone()))
         else:
-            result = None
+            result = {}
 
         db.close()
         return result
@@ -80,8 +87,8 @@ class Index(object):
             "  (target_name, last_update, latest_id)"
             "  VALUES (%(target)s, %(last_update)s, %(latest_id)s)"
             "  ON DUPLICATE KEY UPDATE"
-            "    last_update = %(last_update),"
-            "    latest_id = %(latest_id)"
+            "    last_update = %(last_update)s,"
+            "    latest_id = %(latest_id)s"
         )
 
         db = newslash_db.NewslashDB(self._db_config())
@@ -92,7 +99,7 @@ class Index(object):
         '''destroy current index then create new one for target'''
         pass
 
-    def update_story(self, target, batch_size=1000, progress_cb=None, error_cb=None):
+    def update(self, target, batch_size=1000, progress_cb=None, error_cb=None):
         '''update story index'''
         stories = newslash_db.Stories(self._db_config())
         query_done = False
@@ -139,7 +146,7 @@ class Index(object):
         update_success = 0
         update_errors = 0
         with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer:
-            items = stories.select(stoid_gt=latest_id, last_update_gt=last_update)
+            items = stories.select(stoid_le=latest_id, last_update_ge=last_update)
             for item in items:
                 # first, create term to identify target document
                 target_id = self._get_primary_id(target, item)
@@ -150,11 +157,13 @@ class Index(object):
                 try:
                     doc = self._make_story_document(item)
                 except DocumentMakingError:
-                    errors += 1
+                    update_errors += 1
                     if error_cb is not None:
                         error_cb('update', item)
                     continue
-                indexer.update(term, doc)
+                indexer.delete(term)
+                indexer.add(doc)
+                update_success += 1
 
         if progress_cb is not None:
             progress_cb('update', update_success, update_errors)
@@ -168,9 +177,9 @@ class Index(object):
         # done
         return (success, errors)
 
-    def _get_primary_id(target, item):
-        if target == 'stories':
-            return item['stoid']
+    def _get_primary_id(self, target, item):
+        if target == 'stories' or target == 'story':
+            return item['sid']
 
         return None
     
@@ -256,3 +265,14 @@ class Index(object):
         return result[0]
         
         
+    def query(self, query_string):
+        '''query index'''
+        searcher = self._get_searcher()
+        try:
+            query = lucene_wrapper.Query("content_text", query_string)
+        except lucene_wrapper.QueryParseError as e:
+            sys.stderr.write("query parse error\n")
+            return
+
+        result = searcher.search(query)
+        return result
diff --git a/src/ns_search/test/index.py b/src/ns_search/test/index.py
deleted file mode 100644 (file)
index 9e76530..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-# -*- coding: utf-8 -*-
-'''test for newslash_index/index.py'''
-
-import unittest
-
-from newslash_index import Index, exceptions
-import newslash_db
-
-
-config = {
-    "SearchIndex": {
-        "path": "./test_index"
-    },
-    "Database": {
-        "host": "newslash-db",
-        "name": "searchd_test",
-        "user": "newslash",
-        "password": "foobar"
-    }
-}
-    
-
-class TestIndexClass(unittest.TestCase):
-    def setUp(self):
-        self.index = Index(config=config)
-        try:
-            self.index.create_metadata_table()
-        except exceptions.DatabaseError:
-            pass
-
-    def tearDown(self):
-        db = newslash_db.NewslashDB(config.get("Database"))
-        db.execute('DROP TABLE ns_search_metadata')
-        db.close()
-
-    def test_create_metadata(self):
-        with self.assertRaises(exceptions.DatabaseError):
-            self.index.create_metadata_table()
-
-        
-if __name__ == '__main__':
-    unittest.main()
-
diff --git a/src/ns_search/test/test_config.py.example b/src/ns_search/test/test_config.py.example
new file mode 100644 (file)
index 0000000..925b867
--- /dev/null
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+'''test configuration'''
+
+config = {
+    "SearchIndex": {
+        "path": "./test_index"
+    },
+    "Database": {
+        "host": "newslash-db",
+        "name": "searchd_test",
+        "user": "newslash",
+        "password": "hogehoge"
+    }
+}
+
diff --git a/src/ns_search/test/test_lucene_wrapper.py b/src/ns_search/test/test_lucene_wrapper.py
new file mode 100644 (file)
index 0000000..d29be06
--- /dev/null
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+'''test for lucene_wrapper'''
+
+import unittest
+import calendar
+from datetime import datetime
+import shutil
+
+from newslash_index import htmlutil
+import lucene_wrapper
+
+from test_config import config
+
+item = {
+    "time": datetime(2017, 10, 10, 10, 10, 10),
+    "last_update": datetime(2017, 10, 10, 10, 10, 10),
+    "introtext": u'''<p>イントロテキスト:<a href="http://example.com">example.com</a></p>''',
+    "bodytext": "",
+    "sid": "17/10/10/1234567",
+    "title": u"テスト記事のタイトル",
+    "dept": u"テストテストテスト",
+    "tid": 2271,
+    "uid": 27448,
+    "submitter": 27448
+}
+
+class TestIndexClass(unittest.TestCase):
+    def setUp(self):
+        # cleanup existing index
+        try:
+            shutil.rmtree(config["SearchIndex"]["path"])
+        except OSError:
+            pass
+
+    def tearDown(self):
+        pass
+
+    def test_index_manipulation(self):
+        # convert datetime to UNIX timestamp
+        timestamp = calendar.timegm(item["time"].utctimetuple())
+        last_update = calendar.timegm(item["last_update"].utctimetuple())
+
+        # prepare intro-/body-text, url
+        introtext = item["introtext"] or ""
+        bodytext = item["bodytext"] or ""
+        (content_text, urls) = htmlutil.strip_html_tag(introtext + bodytext)
+
+        # create index
+        lucene_wrapper.init_vm()
+        doc = lucene_wrapper.Document()
+
+        doc.add_string_field("type", "story")
+        doc.add_string_field("id", item["sid"])
+
+        doc.add_text_field("title", item["title"])
+        doc.add_text_field("content_text", content_text)
+        doc.add_text_field("dept", item["dept"])
+
+        doc.add_int_field("create_time", timestamp)
+        doc.add_int_field("last_update", last_update)
+        doc.add_int_field("topic", item["tid"])
+        doc.add_int_field("author", item["uid"])
+        doc.add_int_field("submitter", item["submitter"])
+
+        for url in urls:
+            doc.add_string_field("url", url)
+
+        with lucene_wrapper.Indexer(index_directory=config["SearchIndex"]["path"]) as indexer:
+            indexer.add(doc)
+
+        # search
+        item_type = "story"
+        item_id = item["sid"]
+
+        searcher = lucene_wrapper.Searcher(index_directory=config["SearchIndex"]["path"])
+        type_query = lucene_wrapper.TermQuery("type",item_type)
+        id_query = lucene_wrapper.TermQuery("id", item_id)
+
+        query = lucene_wrapper.BooleanQuery()
+        query.add_must(id_query)
+        query.add_must(type_query)
+
+        result = searcher.search(query)
+        self.assertIs(result.total_hits, 1)
+
+        # delete
+        with lucene_wrapper.Indexer(index_directory=config["SearchIndex"]["path"]) as indexer:
+            indexer.delete(query)
+
+        searcher = lucene_wrapper.Searcher(index_directory=config["SearchIndex"]["path"])
+        result = searcher.search(query)
+        self.assertIs(result.total_hits, 0)
+        
+        # add new one
+        new_text = u"更新されたテキスト"
+        doc.remove_field("content_text")
+        doc.add_text_field("content_text", new_text)
+        
+        with lucene_wrapper.Indexer(index_directory=config["SearchIndex"]["path"]) as indexer:
+            indexer.add(doc)
+
+        searcher = lucene_wrapper.Searcher(index_directory=config["SearchIndex"]["path"])
+        result = searcher.search(query)
+        self.assertIs(result.total_hits, 1)
+        self.assertEqual(result[0].content_text, new_text)
+
+        
+if __name__ == '__main__':
+    unittest.main()
+
diff --git a/src/ns_search/test/test_newslash_index.py b/src/ns_search/test/test_newslash_index.py
new file mode 100644 (file)
index 0000000..a9b0d33
--- /dev/null
@@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+'''test for newslash_index'''
+
+import unittest
+from datetime import datetime
+import shutil
+
+from newslash_index import init_vm, Index, exceptions
+import newslash_db
+from test_config import config
+
+class TestIndexClass(unittest.TestCase):
+    def setUp(self):
+        # create metadata
+        self.index = Index(config=config)
+        try:
+            self.index.create_metadata_table()
+        except exceptions.DatabaseError:
+            pass
+
+        # create story related tables
+        stories = newslash_db.Stories(config["Database"])
+        stories.create_tables()
+        stories.close()
+
+        # check tables exist
+        db = newslash_db.NewslashDB(config["Database"])
+        cur = db.execute('SHOW TABLES LIKE "stories"')
+        self.assertEqual(cur.rowcount, 1)
+        cur = db.execute('SHOW TABLES LIKE "story_text"')
+        self.assertEqual(cur.rowcount, 1)
+        cur = db.execute('SHOW TABLES LIKE "ns_search_metadata"')
+        self.assertEqual(cur.rowcount, 1)
+
+        # cleanup existing index
+        try:
+            shutil.rmtree(config["SearchIndex"]["path"])
+        except OSError:
+            pass
+
+        # init vm
+        init_vm()
+
+    def tearDown(self):
+        # cleanup metadata table
+        self.index.drop_metadata_table()
+
+        # cleanup story related tables
+        stories = newslash_db.Stories(config["Database"])
+        stories.drop_tables()
+        stories.close()
+
+        # check tables not exist
+        db = newslash_db.NewslashDB(config["Database"])
+        cur = db.execute('SHOW TABLES LIKE "stories"')
+        self.assertEqual(cur.rowcount, 0)
+        cur = db.execute('SHOW TABLES LIKE "story_text"')
+        self.assertEqual(cur.rowcount, 0)
+        cur = db.execute('SHOW TABLES LIKE "ns_search_metadata"')
+        self.assertEqual(cur.rowcount, 0)
+
+        # cleanup existing index
+        try:
+            shutil.rmtree(config["SearchIndex"]["path"])
+        except OSError:
+            pass
+
+    def test_create_metadata(self):
+        with self.assertRaises(newslash_db.NewslashDBProgrammingError):
+            self.index.create_metadata_table()
+
+    def test_update_index(self):
+        testdata1 = {
+            "sid": "17/10/10/1234567",
+            "uid": 27448,
+            "dept": u"テストテストテスト",
+            "time": datetime(2017, 10, 10, 10, 10, 10),
+            "title": u"テスト記事のタイトル",
+            "introtext": u"<p>イントロテキスト:<キーワード:猫></p>",
+        }
+
+        testdata2 = {
+            "sid": "17/10/11/2345678",
+            "uid": 27448,
+            "dept": u"テストテストテスト2",
+            "time": datetime(2017, 10, 11, 11, 11, 11),
+            "title": u"2つ目のテスト記事のタイトル",
+            "introtext": u"<p>イントロテキスト:<キーワード:犬></p>",
+        }
+
+        # first, create test story
+        stories = newslash_db.Stories(config["Database"])
+        stoid1 = stories.insert(**testdata1)
+        self.assertIsNot(stoid1, 0)
+
+        item = stories.select(stoid=stoid1)
+        self.assertIsNotNone(item)
+
+        # create index
+        suc, err = self.index.update("story")
+        self.assertIs(suc, 1)
+        self.assertIs(err, 0)
+
+        # check index
+        docs = self.index.query(u"猫")
+        self.assertIs(docs.total_hits, 1)
+        self.assertEqual(docs[0].id, testdata1["sid"])
+
+        # add new story
+        stoid2 = stories.insert(**testdata2)
+        self.assertIsNot(stoid2, 0)
+                          
+        # update index
+        last_update = datetime.now()
+        suc, err = self.index.update("story")
+        self.assertIs(err, 0)
+        self.assertIsNot(suc, 0)
+
+        # check updated index
+        docs = self.index.query(u"犬")
+        self.assertIs(docs.total_hits, 1)
+        self.assertEqual(docs[0].id, testdata2["sid"])
+        docs = self.index.query(u"テスト")
+        self.assertIs(docs.total_hits, 0)
+        docs = self.index.query(u"キーワード")
+        self.assertIs(docs.total_hits, 2)
+
+        # change story
+        new_text = u"<p>猿</p>"
+        testdata1["introtext"] = new_text
+        testdata1["stoid"] = stoid1
+        stories.update(**testdata1)
+        item = stories.select(stoid=stoid1)
+        self.assertIsNotNone(item)
+        self.assertEqual(item[0]["introtext"], new_text)
+
+        # check updated stories
+        items = stories.select(stoid_le=stoid2, last_update_ge=last_update)
+        self.assertIsNot(len(items), 0)
+
+        # update index
+        suc, err = self.index.update("story")
+        self.assertIs(err, 0)
+        self.assertIsNot(suc, 0)
+
+        # check updated index
+        docs = self.index.query(u"猿")
+        self.assertIs(docs.total_hits, 1)
+        self.assertEqual(docs[0].id, testdata1["sid"])
+        docs = self.index.query(u"キーワード")
+        self.assertIs(docs.total_hits, 1)
+        docs = self.index.query(u"猫")
+        self.assertIs(docs.total_hits, 0)
+        docs = self.index.query(u"犬")
+        self.assertIs(docs.total_hits, 1)
+
+        # TODO: check public flag
+        
+if __name__ == '__main__':
+    unittest.main()
+