OSDN Git Service

ns_search: implement comment indexer
authorhylom <hylom@users.sourceforge.jp>
Wed, 28 Mar 2018 12:11:45 +0000 (21:11 +0900)
committerhylom <hylom@users.sourceforge.jp>
Wed, 28 Mar 2018 12:11:45 +0000 (21:11 +0900)
src/ns_search/lucene_wrapper/__init__.py
src/ns_search/lucene_wrapper/wrapper.py
src/ns_search/newslash_db/__init__.py
src/ns_search/newslash_db/base.py
src/ns_search/newslash_db/comments.py [new file with mode: 0644]
src/ns_search/newslash_db/stories.py
src/ns_search/newslash_index/index.py
src/ns_search/newslash_index/metadata.py
src/ns_search/test/test_newslash_index.py

index 1f06416..7229e40 100644 (file)
@@ -12,7 +12,8 @@ from wrapper import (
     Query,
     QueryParseError,
     TermQuery,
-    BooleanQuery
+    BooleanQuery,
+    IntRangeQuery,
 )
 
 
index 226dfdc..99cb312 100644 (file)
@@ -8,6 +8,7 @@ from org.apache.lucene.index import IndexWriter, IndexWriterConfig, IndexReader,
 from org.apache.lucene.search import (IndexSearcher, BooleanClause, TopScoreDocCollector,
                                       TotalHitCountCollector,
                                       TermQuery as _TermQuery,
+                                      NumericRangeQuery,
                                       BooleanQuery as _BooleanQuery)
 from org.apache.lucene.analysis.ja import JapaneseAnalyzer, JapaneseTokenizer
 from org.apache.lucene.store import SimpleFSDirectory
@@ -185,9 +186,18 @@ class TermQuery(LuceneWrapper):
     def __init__(self, field_name, query_term):
         super(TermQuery, self).__init__()
 
-        term = Term(field_name, query_term)
+        term = Term(field_name, str(query_term))
         self.query = _TermQuery(term)
 
+class IntRangeQuery(LuceneWrapper):
+    def __init__(self, field_name, int_min, int_max=None, min_inclusive=True, max_inclusive=True):
+        super(IntRangeQuery, self).__init__()
+
+        if int_max is None:
+            int_max = int_min
+
+        self.query = NumericRangeQuery.newIntRange(field_name, int_min, int_max, min_inclusive, max_inclusive)
+
 
 class Query(LuceneWrapper):
     def __init__(self, field_name, query_text):
index 4784212..9a3825d 100644 (file)
@@ -4,6 +4,7 @@ __author__ = 'Hiromichi Matsushima <hylom@users.osdn.net>'
 __version__ = "0.1.0"
 
 from stories import Stories
+from comments import Comments
 from base import NewslashDB, NewslashDBProgrammingError
 
 __all__ = []
index 46ca12c..2e6cdc3 100644 (file)
@@ -36,16 +36,43 @@ class NewslashDB(object):
             raise NewslashDBProgrammingError(e)
         return cur
 
+    def execute_multi(self, sqls, **kwargs):
+        self.start_transaction()
+
+        try:
+            for sql in sqls:
+                self.execute(sql, **kwargs)
+        except NewslashDBError as e:
+            self.rollback()
+            raise e
+
+        self.commit()
+
+    def generic_insert(self, table_name, params, args):
+        cols = ", ".join(params)
+        values = ", ".join(["%({})s".format(x) for x in params])
+        sql = "INSERT INTO {} ({}) VALUES ({})".format(table_name, cols, values)
+        return self.execute(sql, **args)
+    
+    def generic_update(self, table_name, params, where_params, args):
+        sets = ", ".join(["{}=%({})s".format(x) for x in params])
+        wheres = "AND ".join(["{}=%({})s".format(x) for x in where_params])
+        sql = "UPDATE {} SET {} WHERE {}".format(table_name, sets, wheres)
+        return self.execute(sql, **args)
+
     def start_transaction(self):
         if not self._conn:
             self.connect()
         self._conn.start_transaction()
+        return self
 
     def commit(self):
         self._conn.commit()
+        return self
 
     def rollback(self):
         self._conn.rollback()
+        return self
 
     def connect(self):
         self._conn = mysql.connector.connect(user=self.config("user"),
@@ -53,12 +80,13 @@ class NewslashDB(object):
                                              database=self.config("name"),
                                              host=self.config("host"))
         self._conn.autocommit = True
+        return self
         
     def close(self):
         if self._conn:
             self._conn.close()
             self._conn = None
-
+        return self
 
 class NewslashDBError(Exception):
     def __str__(self):
diff --git a/src/ns_search/newslash_db/comments.py b/src/ns_search/newslash_db/comments.py
new file mode 100644 (file)
index 0000000..6bff9f5
--- /dev/null
@@ -0,0 +1,114 @@
+# coding: utf-8
+'''comments.py: newslash comment related database module'''
+
+from base import NewslashDB, NewslashDBError
+from datetime import datetime
+
+CREATE_COMMENTS_TABLE_SQL = '''
+CREATE TABLE IF NOT EXISTS `comments` (
+  `sid`          mediumint(8) unsigned NOT NULL DEFAULT '0',
+  `cid`          int(10) unsigned      NOT NULL AUTO_INCREMENT,
+  `pid`          int(10) unsigned      NOT NULL DEFAULT '0',
+  `date`         datetime              NOT NULL DEFAULT '1970-01-01 00:00:00',
+  `last_update`  timestamp             NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+  `ipid`         varchar(32)           NOT NULL DEFAULT '',
+  `subnetid`     varchar(32)           NOT NULL DEFAULT '',
+  `subject`      varchar(50)           NOT NULL DEFAULT '',
+  `subject_orig` enum('no','yes')      NOT NULL DEFAULT 'yes',
+  `uid`          mediumint(8) unsigned NOT NULL DEFAULT '0',
+  `points`       tinyint(4)            NOT NULL DEFAULT '0',
+  `pointsorig`   tinyint(4)            NOT NULL DEFAULT '0',
+  `pointsmax`    tinyint(4)            NOT NULL DEFAULT '0',
+  `lastmod`      mediumint(8) unsigned NOT NULL DEFAULT '0',
+  `reason`       tinyint(3) unsigned   NOT NULL DEFAULT '0',
+  `signature`    varchar(32)           NOT NULL DEFAULT '',
+  `len`          smallint(5) unsigned  NOT NULL DEFAULT '0',
+  `karma_bonus`  enum('yes','no')      NOT NULL DEFAULT 'no',
+  `karma`        smallint(6)           NOT NULL DEFAULT '0',
+  `karma_abs`    smallint(5) unsigned  NOT NULL DEFAULT '0',
+  `tweak_orig`   tinyint(4)            NOT NULL DEFAULT '0',
+  `tweak`        tinyint(4)            NOT NULL DEFAULT '0',
+  `badge_id`     tinyint(3) unsigned   NOT NULL DEFAULT '0',
+  PRIMARY KEY        (`cid`),
+  KEY `display`      (`sid`,`points`,`uid`),
+  KEY `byname`       (`uid`,`points`),
+  KEY `ipid`         (`ipid`),
+  KEY `subnetid`     (`subnetid`),
+  KEY `theusual`     (`sid`,`uid`,`points`,`cid`),
+  KEY `countreplies` (`pid`,`sid`),
+  KEY `uid_date`     (`uid`,`date`),
+  KEY `date_sid`     (`date`,`sid`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=DYNAMIC
+'''
+
+CREATE_COMMENT_TEXT_TABLE_SQL = '''
+CREATE TABLE IF NOT EXISTS `comment_text` (
+  `cid`     int(10) unsigned NOT NULL DEFAULT '0',
+  `comment` mediumtext       NOT NULL,
+  PRIMARY KEY (`cid`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=DYNAMIC
+'''
+
+class Comments(object):
+    def __init__(self, config):
+        self.db = NewslashDB(config)
+        self._config = config
+
+    def create_tables(self):
+        '''this function implemented for test purpose...'''
+        self.db.execute_multi((CREATE_COMMENTS_TABLE_SQL,
+                               CREATE_COMMENT_TEXT_TABLE_SQL))
+
+    def drop_tables(self):
+        '''this function implemented for test purpose...'''
+        self.db.execute_multi(('DROP TABLE comments',
+                               'DROP TABLE comment_text'))
+
+    def insert(self, **kwargs):
+        '''this function implemented for test purpose...'''
+        db = self.db.start_transaction()
+
+        try:
+            cur = db.generic_insert("comments", ("sid", "pid", "uid"), kwargs)
+            kwargs["cid"] = cur.lastrowid
+            db.generic_insert("comment_text", ("cid", "comment"), kwargs)
+        except NewslashDBError as e:
+            db.rollback()
+            raise e
+
+        db.commit()
+        return kwargs["cid"]
+
+    def select(self, **kwargs):
+        if not "limit" in kwargs:
+            kwargs["limit"] = self.db.config("default_limit")
+
+        if not "offset" in kwargs:
+            kwargs["offset"] = 0
+
+        if "cid" in kwargs:
+            query = ("SELECT comments.*, comment_text.* FROM comments"
+                     "  LEFT JOIN comment_text USING(cid)"
+                     "  WHERE comments.cid = %(cid)s"
+                     "  ORDER BY comments.cid DESC LIMIT %(limit)s OFFSET %(offset)s")
+        elif "last_update_ge" in kwargs:
+            query = ("SELECT comments.*, comment_text.* FROM comments"
+                     "  LEFT JOIN comment_text USING(cid)"
+                     "  WHERE comments.cid <= %(cid_le)s AND comments.last_update >= %(last_update_ge)s"
+                     "  ORDER BY comments.cid DESC LIMIT %(limit)s OFFSET %(offset)s")
+        elif "cid_gt" in kwargs:
+            query = ("SELECT comments.*, comment_text.* FROM comments"
+                     "  LEFT JOIN comment_text USING(cid)"
+                     "  WHERE comments.cid > %(cid_gt)s"
+                     "  ORDER BY comments.cid DESC LIMIT %(limit)s OFFSET %(offset)s")
+        else:
+            query = ("SELECT comments.*, comment_text.* FROM comments"
+                     "  LEFT JOIN comment_text USING(cid)"
+                     "  ORDER BY comments.cid DESC LIMIT %(limit)s OFFSET %(offset)s")
+
+        cur = self.db.execute(query, **kwargs)
+        col_names = cur.column_names
+        result = [dict(zip(col_names, x)) for x in cur]
+
+        self.db.close()
+        return result
index a66185e..455f82c 100644 (file)
@@ -1,8 +1,7 @@
 # coding: utf-8
 '''stories.py: newslash story related database module'''
 
-from ..base import NewslashDB, NewslashDBError
-from datetime import datetime
+from base import NewslashDB, NewslashDBError
 
 CREATE_STORIES_TABLE_SQL = '''
 CREATE TABLE IF NOT EXISTS `stories` (
@@ -86,6 +85,7 @@ class Stories(NewslashDB):
             raise e
 
         self.commit()
+        self.close()
 
     def drop_tables(self):
         '''this function implemented for test purpose...'''
@@ -100,6 +100,7 @@ class Stories(NewslashDB):
             raise e
 
         self.commit()
+        self.close()
 
     def insert(self, **kwargs):
         '''this function implemented for test purpose...'''
@@ -119,14 +120,14 @@ class Stories(NewslashDB):
             raise e
 
         self.commit()
+        self.close()
         return kwargs["stoid"]
 
     def update(self, **kwargs):
         '''this function implemented for test purpose...'''
-        kwargs["last_update"] = datetime.now()
         
         sql_stories = ('UPDATE stories'
-                       '  SET uid=%(uid)s, dept=%(dept)s, last_update=%(last_update)s'
+                       '  SET uid=%(uid)s, dept=%(dept)s, last_update=NOW()'
                        '  WHERE stoid=%(stoid)s')
         sql_story_text = ('UPDATE story_text'
                           '  SET title=%(title)s, introtext=%(introtext)s'
@@ -149,6 +150,7 @@ class Stories(NewslashDB):
             raise e
 
         self.commit()
+        self.close()
         return
 
     def select(self, **kwargs):
index 27945fc..4c6e5a6 100644 (file)
@@ -44,41 +44,58 @@ class Index(object):
             raise ConfigFileError(ConfigFileError.SECTION_NOT_FOUND, "Database")
         return db_cfg
 
+    def delete(self, target, unique_id, indexer=None):
+        '''delete document from index'''
+        # create query to identify target document
+        q = lucene_wrapper.BooleanQuery()
+        q.add_must(lucene_wrapper.TermQuery("type", target))
+        q.add_must(lucene_wrapper.IntRangeQuery("unique_id", unique_id))
+        
+        # check if deletable
+        #searcher = self._get_searcher()
+        #result = searcher.search(q)
+
+        # delete document
+        if indexer: 
+            indexer.delete(q)
+        else:
+            with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer:
+                indexer.delete(q)
+
     def delete_all(self):
         '''delete all document from index'''
         with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer:
             indexer.delete_all()
 
-    def update(self, target, batch_size=1000, progress_cb=None, error_cb=None):
-        '''update story index'''
-        stories = newslash_db.Stories(self._db_config())
+    def _update(self, selector, target, batch_size=1000, progress_cb=None, error_cb=None):
         query_done = False
 
         # at first, get last indexed id  and timestamp
-        stories_data = self.metadata.get('stories')
-        latest_id = stories_data.get('latest_id', 0)
-        last_update = stories_data.get('last_update')
+        metadata = self.metadata.get(target)
+        latest_id = metadata.get('latest_id', 0)
+        last_update = metadata.get('last_update')
 
-        # add new stories to index
-        start_update = datetime.now()
+        # add document to index
+        # don't use datetime.now(), because Database server's timestamp may be not this server's one.
+        start_update = self.metadata.get_current_timestamp()
         add_success = 0
         add_errors = 0
         offset = 0
-        max_stoid = 0
+        max_unique_id = 0
         with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer:
             while not query_done:
                 # repeat process
-                items = stories.select(limit=batch_size, offset=offset, stoid_gt=latest_id)
+                items = selector(batch_size, offset, latest_id)
                 offset += len(items)
                 if len(items) < batch_size:
                     query_done = True
 
                 for item in items:
-                    if item["neverdisplay"] == "1":
+                    if target == "story" and item["neverdisplay"] == "1":
                         add_success += 1
                         continue
                     try:
-                        doc = self._make_story_document(item)
+                        doc = self._make_document(target, item)
                     except exceptions.DocumentMakingError:
                         add_errors += 1
                         if error_cb is not None:
@@ -90,33 +107,57 @@ class Index(object):
 
                 if progress_cb is not None:
                     progress_cb('add', add_success, add_errors)
-
+                    
                 for item in items:
-                    if item["stoid"] > max_stoid:
-                        max_stoid = item["stoid"];
+                    unique_id = self._get_unique_id(target, item)
+                    if unique_id > max_unique_id:
+                        max_unique_id = unique_id;
+
+        # update metadata
+        self.metadata.update(target=target, last_update=start_update, latest_id=max_unique_id)
+
+        # if no previous update, done
+        if last_update is None:
+            return (add_success, add_errors)
 
         # update index for updated stories
         update_success = 0
         update_errors = 0
+        offset = 0
         with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer:
-            items = stories.select(stoid_le=latest_id, last_update_ge=last_update)
-            for item in items:
-                # first, create term to identify target document
-                target_id = self._get_primary_id(target, item)
-                term = lucene_wrapper.BooleanQuery()
-                term.add_must(lucene_wrapper.TermQuery("type", target))
-                term.add_must(lucene_wrapper.TermQuery("id", target_id))
+            # repeat process
+            items = selector(batch_size, offset, latest_id, last_update)
+            offset += len(items)
+            if len(items) < batch_size:
+                query_done = True
 
+            for item in items:
+                # at first, create new document
                 try:
-                    doc = self._make_story_document(item)
+                    doc = self._make_document(target, item)
                 except exceptions.DocumentMakingError:
                     update_errors += 1
                     if error_cb is not None:
                         error_cb('update', item)
                     continue
-                indexer.delete(term)
-                if item["neverdisplay"] != "1" and item["time"] <= datetime.now():
+
+                # create query to identify target document
+                target_id = self._get_unique_id(target, item)
+                self.delete(target, target_id, indexer) 
+                #q = lucene_wrapper.BooleanQuery()
+                #q.add_must(lucene_wrapper.TermQuery("type", target))
+                #q.add_must(lucene_wrapper.IntRangeQuery("unique_id", target_id))
+
+                # delete document
+                #indexer.delete(q)
+
+                # add document
+                if target == "story":
+                    if item["neverdisplay"] != "1" and item["time"] <= datetime.now():
+                        indexer.add(doc)
+                else:
                     indexer.add(doc)
+
                 update_success += 1
 
         if progress_cb is not None:
@@ -125,53 +166,60 @@ class Index(object):
         success = add_success + update_success
         errors = add_errors + update_errors
 
-        # update metadata
-        self.metadata.update(target='stories', last_update=start_update, latest_id=max_stoid)
-
         # done
         return (success, errors)
 
-    def _get_primary_id(self, target, item):
-        if target == 'stories' or target == 'story':
-            return item['sid']
-
-        return None
-    
-    def update_all_stories(self, batch_size=1000, progress_cb=None, error_cb=None):
-        '''update index for all stories'''
+    def update_story(self, batch_size=1000, progress_cb=None, error_cb=None):
+        '''update story index'''
         stories = newslash_db.Stories(self._db_config())
-        query_done = False
+        def selector(limit, offset, latest_id, last_update=None):
+            if last_update:
+                return stories.select(limit=limit, stoid_le=latest_id, last_update_ge=last_update)
+            else:
+                return stories.select(limit=limit, offset=offset, stoid_gt=latest_id)
+
+        return self._update(selector, 'story', batch_size, progress_cb, error_cb)
+            
+    def update_comment(self, batch_size=1000, progress_cb=None, error_cb=None):
+        '''update comment index'''
+        comments = newslash_db.Comments(self._db_config())
+        def selector(limit, offset, latest_id, last_update=None):
+            if last_update:
+                return comments.select(limit=limit, cid_le=latest_id, last_update_ge=last_update)
+            else:
+                return comments.select(limit=limit, offset=offset, cid_gt=latest_id)
+
+        return self._update(selector, 'comment', batch_size, progress_cb, error_cb)
+
+    def update(self, batch_size=1000, progress_cb=None, error_cb=None):
+        '''update index'''
+        success, errors = self.update_story(batch_size, progress_cb, error_cb)
 
-        success = 0
-        errors = 0
-        offset = 0
-        with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer:
-            while not query_done:
-                # repeat process for each 1000 items
-                items = stories.select(limit=batch_size, offset=offset)
-                offset += len(items)
-                if len(items) < batch_size:
-                    query_done = True
+        # done
+        return (success, errors)
 
-                for item in items:
-                    try:
-                        doc = self._make_story_document(item)
-                    except exceptions.DocumentMakingError:
-                        errors += 1
-                        if error_cb is not None:
-                            error_cb('add', item)
-                        continue
-                    indexer.add(doc)
-                    success += 1
+    def _get_unique_id(self, target, item):
+        if target == 'story':
+            return item['stoid']
+        if target == 'comment':
+            return item['cid']
 
-                if progress_cb is not None:
-                    progress_cb('add', success, errors)
+        sys.stderr.write("_get_unique_id - invalid target: {}".format(target))
+        return None
 
-        return (success, errors)
+    def _make_document(self, target, item):
+        if target == 'story':
+            return self._make_story_document(item)
+        elif target == 'comment':
+            return self._make_comment_document(item)
 
+        sys.stderr.write("_make_document - invalid target: {}".format(target))
+        return None
+    
     def _make_story_document(self, item):
-        '''make Document object from query result'''
+        '''make Document object from story object'''
         doc = lucene_wrapper.Document()
+        # some item have invalid time data
         if item["time"] is None:
             raise exceptions.DocumentMakingError()
 
@@ -185,7 +233,7 @@ class Index(object):
         (content_text, urls) = htmlutil.strip_html_tag(introtext + bodytext)
 
         doc.add_string_field("type", "story")
-        doc.add_string_field("unique_id", str(item["stoid"]))
+        doc.add_int_field("unique_id", item["stoid"])
         doc.add_string_field("id", item["sid"])
 
         doc.add_text_field("title", item["title"])
@@ -203,6 +251,35 @@ class Index(object):
 
         return doc
     
+    def _make_comment_document(self, item):
+        '''make Document object from comment object'''
+        doc = lucene_wrapper.Document()
+
+        # convert datetime to UNIX timestamp
+        timestamp = calendar.timegm(item["date"].utctimetuple())
+        last_update = calendar.timegm(item["last_update"].utctimetuple())
+
+        # prepare intro-/body-text, url
+        (content_text, urls) = htmlutil.strip_html_tag(item["comment"])
+
+        doc.add_string_field("type", "comment")
+        doc.add_int_field("unique_id", item["cid"])
+        doc.add_string_field("id", str(item["cid"]))
+
+        doc.add_text_field("title", item["subject"])
+        doc.add_text_field("content_text", content_text)
+
+        doc.add_int_field("create_time", timestamp)
+        doc.add_int_field("last_update", last_update)
+        doc.add_int_field("author", item["uid"])
+        doc.add_int_field("points", item["points"])
+        
+
+        for url in urls:
+            doc.add_string_field("url", url)
+
+        return doc
+    
     def get(self, item_type, item_id):
         '''get document match iten_type and item_id from index'''
         searcher = self._get_searcher()
index 8195ce4..f10410b 100644 (file)
@@ -15,6 +15,11 @@ class Metadata(object):
             raise exceptions.ConfigFileError(exceptions.ConfigFileError.SECTION_NOT_FOUND, "Database")
         return db_cfg
 
+    def get_current_timestamp(self):
+        db = newslash_db.NewslashDB(self._db_config())
+        cur = db.execute('SELECT NOW()')
+        return cur.fetchone()[0]
+
     def create_table(self):
         sql = (
             "CREATE TABLE ns_search_metadata ("
index e867fdf..b173048 100644 (file)
@@ -9,30 +9,35 @@ from newslash_index import init_vm, Index, exceptions
 import newslash_db
 from test_config import config
 
+TABLES_TO_USE = ("stories",
+                 "story_text",
+                 "story_param",
+                 "ns_search_metadata",
+                 "comments",
+                 "comment_text")
+
 class TestIndexClass(unittest.TestCase):
     def setUp(self):
         # create metadata
         self.index = Index(config=config)
         try:
             self.index.metadata.create_table()
-        except exceptions.DatabaseError:
+        except newslash_db.NewslashDBProgrammingError as e:
             pass
 
-        # create story related tables
-        stories = newslash_db.Stories(config["Database"])
-        stories.create_tables()
-        stories.close()
+        # create tables
+        for m in (newslash_db.Stories(config["Database"]),
+                  newslash_db.Comments(config["Database"])):
+            try:
+                m.create_tables()
+            except newslash_db.NewslashDBProgrammingError as e:
+                pass
 
         # check tables exist
         db = newslash_db.NewslashDB(config["Database"])
-        cur = db.execute('SHOW TABLES LIKE "stories"')
-        self.assertEqual(cur.rowcount, 1)
-        cur = db.execute('SHOW TABLES LIKE "story_text"')
-        self.assertEqual(cur.rowcount, 1)
-        cur = db.execute('SHOW TABLES LIKE "story_param"')
-        self.assertEqual(cur.rowcount, 1)
-        cur = db.execute('SHOW TABLES LIKE "ns_search_metadata"')
-        self.assertEqual(cur.rowcount, 1)
+        for name in TABLES_TO_USE:
+            cur = db.execute('SHOW TABLES LIKE "{}"'.format(name))
+            self.assertEqual(cur.rowcount, 1)
 
         # cleanup existing index
         try:
@@ -47,21 +52,19 @@ class TestIndexClass(unittest.TestCase):
         # cleanup metadata table
         self.index.metadata.drop_table()
 
-        # cleanup story related tables
-        stories = newslash_db.Stories(config["Database"])
-        stories.drop_tables()
-        stories.close()
+        # cleanup tables
+        for m in (newslash_db.Stories(config["Database"]),
+                  newslash_db.Comments(config["Database"])):
+            try:
+                m.drop_tables()
+            except newslash_db.NewslashDBProgrammingError as e:
+                pass
 
         # check tables not exist
         db = newslash_db.NewslashDB(config["Database"])
-        cur = db.execute('SHOW TABLES LIKE "stories"')
-        self.assertEqual(cur.rowcount, 0)
-        cur = db.execute('SHOW TABLES LIKE "story_text"')
-        self.assertEqual(cur.rowcount, 0)
-        cur = db.execute('SHOW TABLES LIKE "story_param"')
-        self.assertEqual(cur.rowcount, 0)
-        cur = db.execute('SHOW TABLES LIKE "ns_search_metadata"')
-        self.assertEqual(cur.rowcount, 0)
+        for name in TABLES_TO_USE:
+            cur = db.execute('SHOW TABLES LIKE "{}"'.format(name))
+            self.assertEqual(cur.rowcount, 0)
 
         # cleanup existing index
         try:
@@ -73,7 +76,43 @@ class TestIndexClass(unittest.TestCase):
         with self.assertRaises(newslash_db.NewslashDBProgrammingError):
             self.index.metadata.create_table()
 
-    def test_update_index(self):
+    def test_update_comment_index(self):
+        testdata = {
+            "sid": 0,
+            "pid": 0,
+            "uid": 27448,
+            "comment": u"コメントテスト"
+        }
+
+        # first, create test comment
+        comments = newslash_db.Comments(config["Database"])
+        cid = comments.insert(**testdata)
+        self.assertIsNot(cid, 0)
+
+        item = comments.select(cid=cid)
+        self.assertIsNotNone(item)
+        self.assertEqual(item[0].get("comment"), testdata["comment"])
+
+        # create index
+        suc, err = self.index.update_comment()
+        self.assertIs(suc, 1)
+        self.assertIs(err, 0)
+
+        # check index
+        docs = self.index.query(u"コメント")
+        self.assertIs(docs.total_hits, 1)
+        self.assertEqual(docs[0].type, "comment")
+        self.assertEqual(docs[0].id, str(cid))
+
+        # delete from index
+        self.index.delete("comment", cid)
+        
+        # check index
+        docs = self.index.query(u"コメント")
+        self.assertIs(docs.total_hits, 0)
+            
+
+    def test_update_story_index(self):
         testdata1 = {
             "sid": "17/10/10/1234567",
             "uid": 27448,
@@ -102,7 +141,7 @@ class TestIndexClass(unittest.TestCase):
         self.assertIsNot(item[0].get("neverdisplay"), "1")
 
         # create index
-        suc, err = self.index.update("story")
+        suc, err = self.index.update_story()
         self.assertIs(suc, 1)
         self.assertIs(err, 0)
 
@@ -116,11 +155,14 @@ class TestIndexClass(unittest.TestCase):
         self.assertIsNot(stoid2, 0)
                           
         # update index
-        last_update = datetime.now()
-        suc, err = self.index.update("story")
+        suc, err = self.index.update_story()
         self.assertIs(err, 0)
         self.assertIsNot(suc, 0)
 
+        # check last_update
+        last_update = self.index.metadata.get('story').get('last_update')
+        self.assertIsNotNone(last_update)
+
         # check updated index
         docs = self.index.query(u"犬")
         self.assertIs(docs.total_hits, 1)
@@ -141,14 +183,17 @@ class TestIndexClass(unittest.TestCase):
 
         # check updated stories
         items = stories.select(stoid_le=stoid2, last_update_ge=last_update)
-        self.assertIs(len(items), 1)
+        self.assertTrue(len(items) > 0)
 
         # update index
-        last_update = datetime.now()
-        suc, err = self.index.update("story")
+        suc, err = self.index.update_story()
         self.assertIs(err, 0)
         self.assertIsNot(suc, 0)
 
+        # check last_update
+        last_update = self.index.metadata.get('story').get('last_update')
+        self.assertIsNotNone(last_update)
+        
         # check updated index
         docs = self.index.query(u"猿")
         self.assertIs(docs.total_hits, 1)
@@ -169,14 +214,17 @@ class TestIndexClass(unittest.TestCase):
 
         # check updated stories
         items = stories.select(stoid_le=stoid2, last_update_ge=last_update)
-        self.assertIs(len(items), 1)
+        self.assertTrue(len(items) > 0)
 
         # update index
-        last_update = datetime.now()
-        suc, err = self.index.update("story")
+        suc, err = self.index.update_story()
         self.assertIs(err, 0)
         self.assertIsNot(suc, 0)
 
+        # check last_update
+        last_update = self.index.metadata.get('story').get('last_update')
+        self.assertIsNotNone(last_update)
+
         # check updated index
         docs = self.index.query(u"猿")
         self.assertIs(docs.total_hits, 0)