) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=DYNAMIC
'''
+CREATE_STORY_PARAM_TABLE_SQL = '''
+CREATE TABLE IF NOT EXISTS `story_param` (
+ `param_id` mediumint(8) unsigned NOT NULL AUTO_INCREMENT,
+ `stoid` mediumint(8) unsigned NOT NULL DEFAULT '0',
+ `name` varchar(32) NOT NULL DEFAULT '',
+ `value` mediumtext NOT NULL,
+ PRIMARY KEY (`param_id`),
+ UNIQUE KEY `story_key` (`stoid`,`name`)
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=DYNAMIC
+'''
+
class Stories(NewslashDB):
def __init__(self, config):
super(Stories, self).__init__(config)
try:
self.execute(CREATE_STORIES_TABLE_SQL)
self.execute(CREATE_STORY_TEXT_TABLE_SQL)
+ self.execute(CREATE_STORY_PARAM_TABLE_SQL)
except NewslashDBError as e:
self.rollback()
raise e
try:
self.execute('DROP TABLE stories')
self.execute('DROP TABLE story_text')
+ self.execute('DROP TABLE story_param')
except NewslashDBError as e:
self.rollback()
raise e
def update(self, **kwargs):
'''this function implemented for test purpose...'''
kwargs["last_update"] = datetime.now()
+
sql_stories = ('UPDATE stories'
' SET uid=%(uid)s, dept=%(dept)s, last_update=%(last_update)s'
' WHERE stoid=%(stoid)s')
sql_story_text = ('UPDATE story_text'
' SET title=%(title)s, introtext=%(introtext)s'
' WHERE stoid=%(stoid)s')
+ sql_story_param = ('INSERT INTO story_param'
+ ' (stoid, name, value)'
+ ' VALUE (%(stoid)s, "neverdisplay", %(neverdisplay)s)'
+ ' ON DUPLICATE KEY UPDATE value = %(neverdisplay)s')
self.start_transaction()
try:
self.execute(sql_stories, **kwargs)
self.execute(sql_story_text, **kwargs)
+ if "neverdisplay" in kwargs:
+ self.execute(sql_story_param, **kwargs)
+
except NewslashDBError as e:
self.rollback()
raise e
kwargs["offset"] = 0
if "stoid" in kwargs:
- query = ("SELECT stories.*, story_text.* FROM stories"
+ query = ("SELECT stories.*, story_text.*, story_param.value AS neverdisplay FROM stories"
" LEFT JOIN story_text USING(stoid)"
- " WHERE stories.stoid = %(stoid)s"
+ ' LEFT JOIN story_param ON story_param.name = "neverdisplay"'
+ ' AND story_text.stoid = story_param.stoid'
+ " WHERE stories.stoid = %(stoid)s AND stories.time <= NOW()"
" ORDER BY stories.stoid DESC LIMIT %(limit)s OFFSET %(offset)s")
elif "last_update_ge" in kwargs:
- query = ("SELECT stories.*, story_text.* FROM stories"
+ query = ("SELECT stories.*, story_text.*, story_param.value AS neverdisplay FROM stories"
" LEFT JOIN story_text USING(stoid)"
+ ' LEFT JOIN story_param ON story_param.name = "neverdisplay"'
+ ' AND story_text.stoid = story_param.stoid'
" WHERE stories.stoid <= %(stoid_le)s AND stories.last_update >= %(last_update_ge)s"
" ORDER BY stories.stoid DESC LIMIT %(limit)s OFFSET %(offset)s")
elif "stoid_gt" in kwargs:
- query = ("SELECT stories.*, story_text.* FROM stories"
+ query = ("SELECT stories.*, story_text.*, story_param.value AS neverdisplay FROM stories"
" LEFT JOIN story_text USING(stoid)"
- " WHERE stories.stoid > %(stoid_gt)s"
+ ' LEFT JOIN story_param ON story_param.name = "neverdisplay"'
+ ' AND story_text.stoid = story_param.stoid'
+ " WHERE stories.stoid > %(stoid_gt)s AND stories.time <= NOW()"
" ORDER BY stories.stoid DESC LIMIT %(limit)s OFFSET %(offset)s")
else:
- query = ("SELECT stories.*, story_text.* FROM stories"
+ query = ("SELECT stories.*, story_text.*, story_param.value AS neverdisplay FROM stories"
" LEFT JOIN story_text USING(stoid)"
+ ' LEFT JOIN story_param ON story_param.name = "neverdisplay"'
+ ' AND story_text.stoid = story_param.stoid'
+ ' WHERE stories.time <= NOW()'
" ORDER BY stories.stoid DESC LIMIT %(limit)s OFFSET %(offset)s")
cur = self.execute(query, **kwargs)
- result = []
- for item in cur:
- result.append(dict(zip(cur.column_names, item)))
+ col_names = cur.column_names
+ result = [dict(zip(col_names, x)) for x in cur]
self.close()
return result
return result
def update_metadata(self, target, last_update, latest_id):
- sql = (
- "INSERT INTO ns_search_metadata"
- " (target_name, last_update, latest_id)"
- " VALUES (%(target)s, %(last_update)s, %(latest_id)s)"
- " ON DUPLICATE KEY UPDATE"
- " last_update = %(last_update)s,"
- " latest_id = %(latest_id)s"
- )
+ if latest_id > 0:
+ sql = (
+ "INSERT INTO ns_search_metadata"
+ " (target_name, last_update, latest_id)"
+ " VALUES (%(target)s, %(last_update)s, %(latest_id)s)"
+ " ON DUPLICATE KEY UPDATE"
+ " last_update = %(last_update)s,"
+ " latest_id = %(latest_id)s"
+ )
+ else:
+ sql = (
+ "INSERT INTO ns_search_metadata"
+ " (target_name, last_update, latest_id)"
+ " VALUES (%(target)s, %(last_update)s, %(latest_id)s)"
+ " ON DUPLICATE KEY UPDATE"
+ " last_update = %(last_update)s"
+ )
db = newslash_db.NewslashDB(self._db_config())
cur = db.execute(sql, target=target, last_update=last_update, latest_id=latest_id)
# add new stories to index
start_update = datetime.now()
- success = 0
- errors = 0
+ add_success = 0
+ add_errors = 0
offset = 0
max_stoid = 0
with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer:
query_done = True
for item in items:
+ if item["neverdisplay"] == "1":
+ add_success += 1
+ continue
try:
doc = self._make_story_document(item)
- except DocumentMakingError:
- errors += 1
+ except exceptions.DocumentMakingError:
+ add_errors += 1
if error_cb is not None:
error_cb('add', item)
continue
indexer.add(doc)
- success += 1
+ add_success += 1
if progress_cb is not None:
- progress_cb('add', success, errors)
+ progress_cb('add', add_success, add_errors)
for item in items:
if item["stoid"] > max_stoid:
try:
doc = self._make_story_document(item)
- except DocumentMakingError:
+ except exceptions.DocumentMakingError:
update_errors += 1
if error_cb is not None:
error_cb('update', item)
continue
indexer.delete(term)
- indexer.add(doc)
+ if item["neverdisplay"] != "1" and item["time"] <= datetime.now():
+ indexer.add(doc)
update_success += 1
if progress_cb is not None:
progress_cb('update', update_success, update_errors)
- success += update_success
- errors += update_errors
+ success = add_success + update_success
+ errors = add_errors + update_errors
# update metadata
self.update_metadata(target='stories', last_update=start_update, latest_id=max_stoid)
for item in items:
try:
doc = self._make_story_document(item)
- except DocumentMakingError:
+ except exceptions.DocumentMakingError:
errors += 1
if error_cb is not None:
error_cb('add', item)
'''make Document object from query result'''
doc = lucene_wrapper.Document()
if item["time"] is None:
- raise DocumentMakingError()
+ raise exceptions.DocumentMakingError()
# convert datetime to UNIX timestamp
timestamp = calendar.timegm(item["time"].utctimetuple())
parser_query.add_argument('subcommand', action='store_const', const='query')
parser_query.add_argument('query_string', help='query string')
- # 'newindex' subcommand
- parser_query = subparsers.add_parser("newindex", help="create new lucene index")
- parser_query.add_argument('subcommand', action='store_const', const='newindex')
+ # 'index' subcommand
+ parser_query = subparsers.add_parser("index", help="manipulate lucene index")
+ parser_query.add_argument('subcommand', action='store_const', const='index')
+ parser_query.add_argument('action', help='action')
+
# 'getdocument' subcommand
parser_getdoc = subparsers.add_parser("getdocument", help="get document from lucene index")
parser = self._make_parser()
parser.print_help()
- def newindex(self):
+ def index(self):
+ action = self.args.action
+
lucene_wrapper.init_vm()
index = Index(config=self.config)
progress_cb = lambda phase, success, errors: print("{} to index {} items... ({} errors)".format(phase, success, errors))
error_cb = lambda phase, item: print("indexing {} error: id={}".format(phase, item["sid"]))
- start_time = time.time()
- index.update_all_stories(progress_cb=progress_cb, error_cb=error_cb)
- print("indexing done. total time: {}s".format(time.time() - start_time))
+ if action == 'create':
+ start_time = time.time()
+ index.update("story", progress_cb=progress_cb, error_cb=error_cb)
+ print("indexing done. total time: {}s".format(time.time() - start_time))
+ elif action == 'update':
+ index.update("story", progress_cb=progress_cb, error_cb=error_cb)
+ print("indexing done. total time: {}s".format(time.time() - start_time))
def query(self):
if self.index_dir is None:
if self.sub_command == 'query':
return self.query()
- if self.sub_command == 'newindex':
- return self.newindex()
+ if self.sub_command == 'index':
+ return self.index()
if self.sub_command == 'getdocument':
return self.getdocument()
self.assertEqual(cur.rowcount, 1)
cur = db.execute('SHOW TABLES LIKE "story_text"')
self.assertEqual(cur.rowcount, 1)
+ cur = db.execute('SHOW TABLES LIKE "story_param"')
+ self.assertEqual(cur.rowcount, 1)
cur = db.execute('SHOW TABLES LIKE "ns_search_metadata"')
self.assertEqual(cur.rowcount, 1)
self.assertEqual(cur.rowcount, 0)
cur = db.execute('SHOW TABLES LIKE "story_text"')
self.assertEqual(cur.rowcount, 0)
+ cur = db.execute('SHOW TABLES LIKE "story_param"')
+ self.assertEqual(cur.rowcount, 0)
cur = db.execute('SHOW TABLES LIKE "ns_search_metadata"')
self.assertEqual(cur.rowcount, 0)
item = stories.select(stoid=stoid1)
self.assertIsNotNone(item)
+ self.assertIsNot(item[0].get("neverdisplay"), "1")
# create index
suc, err = self.index.update("story")
# check updated stories
items = stories.select(stoid_le=stoid2, last_update_ge=last_update)
- self.assertIsNot(len(items), 0)
+ self.assertIs(len(items), 1)
# update index
+ last_update = datetime.now()
suc, err = self.index.update("story")
self.assertIs(err, 0)
self.assertIsNot(suc, 0)
docs = self.index.query(u"犬")
self.assertIs(docs.total_hits, 1)
+ # change story to neverdisplay
+ testdata1["neverdisplay"] = "1"
+ stories.update(**testdata1)
+ item = stories.select(stoid=stoid1)
+ self.assertIsNotNone(item)
+ self.assertEqual(item[0]["neverdisplay"], "1")
+
+ # check updated stories
+ items = stories.select(stoid_le=stoid2, last_update_ge=last_update)
+ self.assertIs(len(items), 1)
+
+ # update index
+ last_update = datetime.now()
+ suc, err = self.index.update("story")
+ self.assertIs(err, 0)
+ self.assertIsNot(suc, 0)
+
+ # check updated index
+ docs = self.index.query(u"猿")
+ self.assertIs(docs.total_hits, 0)
+ docs = self.index.query(u"キーワード")
+ self.assertIs(docs.total_hits, 1)
+ docs = self.index.query(u"猫")
+ self.assertIs(docs.total_hits, 0)
+ docs = self.index.query(u"犬")
+ self.assertIs(docs.total_hits, 1)
+
# TODO: check public flag
if __name__ == '__main__':