OSDN Git Service

ns_search: check story's neverdisplay flag
authorhylom <hylom@users.sourceforge.jp>
Mon, 26 Mar 2018 10:35:38 +0000 (19:35 +0900)
committerhylom <hylom@users.sourceforge.jp>
Mon, 26 Mar 2018 10:35:38 +0000 (19:35 +0900)
src/ns_search/newslash_db/stories/stories.py
src/ns_search/newslash_index/index.py
src/ns_search/searchd_cli.py
src/ns_search/test/test_newslash_index.py

index 54f9d71..a66185e 100644 (file)
@@ -58,6 +58,17 @@ PRIMARY KEY (`stoid`)
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=DYNAMIC
 '''
 
+CREATE_STORY_PARAM_TABLE_SQL = '''
+CREATE TABLE IF NOT EXISTS `story_param` (
+      `param_id` mediumint(8) unsigned NOT NULL AUTO_INCREMENT,
+      `stoid`    mediumint(8) unsigned NOT NULL DEFAULT '0',
+      `name`     varchar(32)           NOT NULL DEFAULT '',
+      `value`    mediumtext            NOT NULL,
+      PRIMARY KEY (`param_id`),
+      UNIQUE KEY  `story_key` (`stoid`,`name`)
+    ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=DYNAMIC
+'''
+
 class Stories(NewslashDB):
     def __init__(self, config):
         super(Stories, self).__init__(config)
@@ -69,6 +80,7 @@ class Stories(NewslashDB):
         try:
             self.execute(CREATE_STORIES_TABLE_SQL)
             self.execute(CREATE_STORY_TEXT_TABLE_SQL)
+            self.execute(CREATE_STORY_PARAM_TABLE_SQL)
         except NewslashDBError as e:
             self.rollback()
             raise e
@@ -82,6 +94,7 @@ class Stories(NewslashDB):
         try:
             self.execute('DROP TABLE stories')
             self.execute('DROP TABLE story_text')
+            self.execute('DROP TABLE story_param')
         except NewslashDBError as e:
             self.rollback()
             raise e
@@ -111,18 +124,26 @@ class Stories(NewslashDB):
     def update(self, **kwargs):
         '''this function implemented for test purpose...'''
         kwargs["last_update"] = datetime.now()
+        
         sql_stories = ('UPDATE stories'
                        '  SET uid=%(uid)s, dept=%(dept)s, last_update=%(last_update)s'
                        '  WHERE stoid=%(stoid)s')
         sql_story_text = ('UPDATE story_text'
                           '  SET title=%(title)s, introtext=%(introtext)s'
                           '  WHERE stoid=%(stoid)s')
+        sql_story_param = ('INSERT INTO story_param'
+                           '  (stoid, name, value)'
+                           '  VALUE (%(stoid)s, "neverdisplay", %(neverdisplay)s)'
+                           '  ON DUPLICATE KEY UPDATE value = %(neverdisplay)s')
 
         self.start_transaction()
 
         try:
             self.execute(sql_stories, **kwargs)
             self.execute(sql_story_text, **kwargs)
+            if "neverdisplay" in kwargs:
+                self.execute(sql_story_param, **kwargs)
+
         except NewslashDBError as e:
             self.rollback()
             raise e
@@ -138,30 +159,38 @@ class Stories(NewslashDB):
             kwargs["offset"] = 0
 
         if "stoid" in kwargs:
-            query = ("SELECT stories.*, story_text.* FROM stories"
+            query = ("SELECT stories.*, story_text.*, story_param.value AS neverdisplay FROM stories"
                      "  LEFT JOIN story_text USING(stoid)"
-                     "  WHERE stories.stoid = %(stoid)s"
+                     '  LEFT JOIN story_param ON story_param.name = "neverdisplay"'
+                     '                          AND story_text.stoid = story_param.stoid'
+                     "  WHERE stories.stoid = %(stoid)s AND stories.time <= NOW()"
                      "  ORDER BY stories.stoid DESC LIMIT %(limit)s OFFSET %(offset)s")
         elif "last_update_ge" in kwargs:
-            query = ("SELECT stories.*, story_text.* FROM stories"
+            query = ("SELECT stories.*, story_text.*, story_param.value AS neverdisplay FROM stories"
                      "  LEFT JOIN story_text USING(stoid)"
+                     '  LEFT JOIN story_param ON story_param.name = "neverdisplay"'
+                     '                          AND story_text.stoid = story_param.stoid'
                      "  WHERE stories.stoid <= %(stoid_le)s AND stories.last_update >= %(last_update_ge)s"
                      "  ORDER BY stories.stoid DESC LIMIT %(limit)s OFFSET %(offset)s")
         elif "stoid_gt" in kwargs:
-            query = ("SELECT stories.*, story_text.* FROM stories"
+            query = ("SELECT stories.*, story_text.*, story_param.value AS neverdisplay FROM stories"
                      "  LEFT JOIN story_text USING(stoid)"
-                     "  WHERE stories.stoid > %(stoid_gt)s"
+                     '  LEFT JOIN story_param ON story_param.name = "neverdisplay"'
+                     '                          AND story_text.stoid = story_param.stoid'
+                     "  WHERE stories.stoid > %(stoid_gt)s AND stories.time <= NOW()"
                      "  ORDER BY stories.stoid DESC LIMIT %(limit)s OFFSET %(offset)s")
             
         else:
-            query = ("SELECT stories.*, story_text.* FROM stories"
+            query = ("SELECT stories.*, story_text.*, story_param.value AS neverdisplay FROM stories"
                      "  LEFT JOIN story_text USING(stoid)"
+                     '  LEFT JOIN story_param ON story_param.name = "neverdisplay"'
+                     '                          AND story_text.stoid = story_param.stoid'
+                     '  WHERE stories.time <= NOW()'
                      "  ORDER BY stories.stoid DESC LIMIT %(limit)s OFFSET %(offset)s")
 
         cur = self.execute(query, **kwargs)
-        result = []
-        for item in cur:
-            result.append(dict(zip(cur.column_names, item)))
+        col_names = cur.column_names
+        result = [dict(zip(col_names, x)) for x in cur]
 
         self.close()
         return result
index afc709e..f30a1ef 100644 (file)
@@ -82,14 +82,23 @@ class Index(object):
         return result
 
     def update_metadata(self, target, last_update, latest_id):
-        sql = (
-            "INSERT INTO ns_search_metadata"
-            "  (target_name, last_update, latest_id)"
-            "  VALUES (%(target)s, %(last_update)s, %(latest_id)s)"
-            "  ON DUPLICATE KEY UPDATE"
-            "    last_update = %(last_update)s,"
-            "    latest_id = %(latest_id)s"
-        )
+        if latest_id > 0:
+            sql = (
+                "INSERT INTO ns_search_metadata"
+                "  (target_name, last_update, latest_id)"
+                "  VALUES (%(target)s, %(last_update)s, %(latest_id)s)"
+                "  ON DUPLICATE KEY UPDATE"
+                "    last_update = %(last_update)s,"
+                "    latest_id = %(latest_id)s"
+            )
+        else:
+            sql = (
+                "INSERT INTO ns_search_metadata"
+                "  (target_name, last_update, latest_id)"
+                "  VALUES (%(target)s, %(last_update)s, %(latest_id)s)"
+                "  ON DUPLICATE KEY UPDATE"
+                "    last_update = %(last_update)s"
+            )
 
         db = newslash_db.NewslashDB(self._db_config())
         cur = db.execute(sql, target=target, last_update=last_update, latest_id=latest_id)
@@ -111,8 +120,8 @@ class Index(object):
 
         # add new stories to index
         start_update = datetime.now()
-        success = 0
-        errors = 0
+        add_success = 0
+        add_errors = 0
         offset = 0
         max_stoid = 0
         with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer:
@@ -124,19 +133,22 @@ class Index(object):
                     query_done = True
 
                 for item in items:
+                    if item["neverdisplay"] == "1":
+                        add_success += 1
+                        continue
                     try:
                         doc = self._make_story_document(item)
-                    except DocumentMakingError:
-                        errors += 1
+                    except exceptions.DocumentMakingError:
+                        add_errors += 1
                         if error_cb is not None:
                             error_cb('add', item)
                         continue
 
                     indexer.add(doc)
-                    success += 1
+                    add_success += 1
 
                 if progress_cb is not None:
-                    progress_cb('add', success, errors)
+                    progress_cb('add', add_success, add_errors)
 
                 for item in items:
                     if item["stoid"] > max_stoid:
@@ -156,20 +168,21 @@ class Index(object):
 
                 try:
                     doc = self._make_story_document(item)
-                except DocumentMakingError:
+                except exceptions.DocumentMakingError:
                     update_errors += 1
                     if error_cb is not None:
                         error_cb('update', item)
                     continue
                 indexer.delete(term)
-                indexer.add(doc)
+                if item["neverdisplay"] != "1" and item["time"] <= datetime.now():
+                    indexer.add(doc)
                 update_success += 1
 
         if progress_cb is not None:
             progress_cb('update', update_success, update_errors)
 
-        success += update_success
-        errors += update_errors
+        success = add_success + update_success
+        errors = add_errors + update_errors
 
         # update metadata
         self.update_metadata(target='stories', last_update=start_update, latest_id=max_stoid)
@@ -202,7 +215,7 @@ class Index(object):
                 for item in items:
                     try:
                         doc = self._make_story_document(item)
-                    except DocumentMakingError:
+                    except exceptions.DocumentMakingError:
                         errors += 1
                         if error_cb is not None:
                             error_cb('add', item)
@@ -219,7 +232,7 @@ class Index(object):
         '''make Document object from query result'''
         doc = lucene_wrapper.Document()
         if item["time"] is None:
-            raise DocumentMakingError()
+            raise exceptions.DocumentMakingError()
 
         # convert datetime to UNIX timestamp
         timestamp = calendar.timegm(item["time"].utctimetuple())
index d23be3d..b06e555 100755 (executable)
@@ -39,9 +39,11 @@ class SearchCLI(object):
         parser_query.add_argument('subcommand', action='store_const', const='query')
         parser_query.add_argument('query_string', help='query string')
 
-        # 'newindex' subcommand
-        parser_query = subparsers.add_parser("newindex", help="create new lucene index")
-        parser_query.add_argument('subcommand', action='store_const', const='newindex')
+        # 'index' subcommand
+        parser_query = subparsers.add_parser("index", help="manipulate lucene index")
+        parser_query.add_argument('subcommand', action='store_const', const='index')
+        parser_query.add_argument('action', help='action')
+
 
         # 'getdocument' subcommand
         parser_getdoc = subparsers.add_parser("getdocument", help="get document from lucene index")
@@ -92,15 +94,21 @@ class SearchCLI(object):
         parser = self._make_parser()
         parser.print_help()
 
-    def newindex(self):
+    def index(self):
+        action = self.args.action
+
         lucene_wrapper.init_vm()
         index = Index(config=self.config)
         progress_cb = lambda phase, success, errors: print("{} to index {} items... ({} errors)".format(phase, success, errors))
         error_cb = lambda phase, item: print("indexing {} error: id={}".format(phase, item["sid"]))
 
-        start_time = time.time()
-        index.update_all_stories(progress_cb=progress_cb, error_cb=error_cb)
-        print("indexing done. total time: {}s".format(time.time() - start_time))
+        if action == 'create':
+            start_time = time.time()
+            index.update("story", progress_cb=progress_cb, error_cb=error_cb)
+            print("indexing done. total time: {}s".format(time.time() - start_time))
+        elif action == 'update':
+            index.update("story", progress_cb=progress_cb, error_cb=error_cb)
+            print("indexing done. total time: {}s".format(time.time() - start_time))
 
     def query(self):
         if self.index_dir is None:
@@ -147,8 +155,8 @@ class SearchCLI(object):
         if self.sub_command == 'query':
             return self.query()
 
-        if self.sub_command == 'newindex':
-            return self.newindex()
+        if self.sub_command == 'index':
+            return self.index()
 
         if self.sub_command == 'getdocument':
             return self.getdocument()
index a9b0d33..74a800f 100644 (file)
@@ -29,6 +29,8 @@ class TestIndexClass(unittest.TestCase):
         self.assertEqual(cur.rowcount, 1)
         cur = db.execute('SHOW TABLES LIKE "story_text"')
         self.assertEqual(cur.rowcount, 1)
+        cur = db.execute('SHOW TABLES LIKE "story_param"')
+        self.assertEqual(cur.rowcount, 1)
         cur = db.execute('SHOW TABLES LIKE "ns_search_metadata"')
         self.assertEqual(cur.rowcount, 1)
 
@@ -56,6 +58,8 @@ class TestIndexClass(unittest.TestCase):
         self.assertEqual(cur.rowcount, 0)
         cur = db.execute('SHOW TABLES LIKE "story_text"')
         self.assertEqual(cur.rowcount, 0)
+        cur = db.execute('SHOW TABLES LIKE "story_param"')
+        self.assertEqual(cur.rowcount, 0)
         cur = db.execute('SHOW TABLES LIKE "ns_search_metadata"')
         self.assertEqual(cur.rowcount, 0)
 
@@ -95,6 +99,7 @@ class TestIndexClass(unittest.TestCase):
 
         item = stories.select(stoid=stoid1)
         self.assertIsNotNone(item)
+        self.assertIsNot(item[0].get("neverdisplay"), "1")
 
         # create index
         suc, err = self.index.update("story")
@@ -136,9 +141,10 @@ class TestIndexClass(unittest.TestCase):
 
         # check updated stories
         items = stories.select(stoid_le=stoid2, last_update_ge=last_update)
-        self.assertIsNot(len(items), 0)
+        self.assertIs(len(items), 1)
 
         # update index
+        last_update = datetime.now()
         suc, err = self.index.update("story")
         self.assertIs(err, 0)
         self.assertIsNot(suc, 0)
@@ -154,6 +160,33 @@ class TestIndexClass(unittest.TestCase):
         docs = self.index.query(u"犬")
         self.assertIs(docs.total_hits, 1)
 
+        # change story to neverdisplay
+        testdata1["neverdisplay"] = "1"
+        stories.update(**testdata1)
+        item = stories.select(stoid=stoid1)
+        self.assertIsNotNone(item)
+        self.assertEqual(item[0]["neverdisplay"], "1")
+
+        # check updated stories
+        items = stories.select(stoid_le=stoid2, last_update_ge=last_update)
+        self.assertIs(len(items), 1)
+
+        # update index
+        last_update = datetime.now()
+        suc, err = self.index.update("story")
+        self.assertIs(err, 0)
+        self.assertIsNot(suc, 0)
+
+        # check updated index
+        docs = self.index.query(u"猿")
+        self.assertIs(docs.total_hits, 0)
+        docs = self.index.query(u"キーワード")
+        self.assertIs(docs.total_hits, 1)
+        docs = self.index.query(u"猫")
+        self.assertIs(docs.total_hits, 0)
+        docs = self.index.query(u"犬")
+        self.assertIs(docs.total_hits, 1)
+
         # TODO: check public flag
         
 if __name__ == '__main__':