OSDN Git Service

ns_search: add metadata migration tool
authorhylom <hylom@users.osdn.me>
Thu, 6 Jun 2019 17:06:57 +0000 (17:06 +0000)
committerhylom <hylom@users.osdn.me>
Thu, 6 Jun 2019 17:06:57 +0000 (17:06 +0000)
src/ns_search/newslash_index/__init__.py
src/ns_search/newslash_index/metadata.py
src/ns_search/newslash_index/metadata_mysql.py [new file with mode: 0644]
src/ns_search/tools/_ns_metadata.db [new file with mode: 0644]
src/ns_search/tools/migrate_metadata.py [new file with mode: 0755]
test-container/ns-searchd/Makefile

index bea047c..aea0f36 100644 (file)
@@ -9,6 +9,7 @@ from metadata import Metadata
 from exceptions import *
 import htmlutil
 
+from metadata_mysql import Metadata as MySQLMetadata
 import lucene_wrapper
 
 def init_vm():
index 7153ebf..861d0e4 100644 (file)
@@ -104,6 +104,16 @@ class Metadata(object):
         db.close()
         return result
 
+    def get_all(self):
+        sql = 'SELECT * from ns_search_metadata'
+        db = self._get_db()
+        cur = db.execute(sql)
+        column_names = [x[0] for x in cur.description]
+        result = [dict(zip(column_names, x)) for x in cur.fetchall()]
+        db.close()
+        return result
+        
+
     def update(self, target, last_update, latest_id):
         # sqlite 3.24 or after, can use ON CONFLICT, but current package's version
         # not supported...
diff --git a/src/ns_search/newslash_index/metadata_mysql.py b/src/ns_search/newslash_index/metadata_mysql.py
new file mode 100644 (file)
index 0000000..a47c84e
--- /dev/null
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""metadata: index metadata for newslash search system"""
+
+import newslash_db
+from mysql.connector.errors import ProgrammingError
+import exceptions
+
+class Metadata(object):
+    def __init__(self, config):
+        self._config = config
+
+    def _db_config(self):
+        db_cfg = self._config.get("database")
+        if db_cfg is None:
+            raise exceptions.ConfigFileError(exceptions.ConfigFileError.SECTION_NOT_FOUND, "Database")
+        return db_cfg
+
+    def get_current_timestamp(self):
+        db = newslash_db.NewslashDB(self._db_config())
+        cur = db.execute('SELECT NOW()')
+        return cur.fetchone()[0]
+
+    def create_table(self):
+        sql = (
+            "CREATE TABLE ns_search_metadata ("
+            "  target_id    tinyint(8) unsigned NOT NULL AUTO_INCREMENT,"
+            "  target_name  varchar(32)         NOT NULL UNIQUE,"
+            "  last_update  timestamp           NOT NULL DEFAULT CURRENT_TIMESTAMP,"
+            "  latest_id    int(8) unsigned     NOT NULL DEFAULT 0,"
+            "  PRIMARY KEY (target_id)"
+            ")"
+        )
+
+        db= newslash_db.NewslashDB(self._db_config())
+        try:
+            db.execute(sql)
+        except ProgrammingError as e:
+            db.close()
+            raise exceptions.DatabaseError('table creation error: {}'.format(str(e)))
+
+        db.close()
+
+    def drop_table(self):
+        db = newslash_db.NewslashDB(self._db_config())
+        db.execute('DROP TABLE ns_search_metadata')
+        db.close()
+
+    def delete_all(self):
+        db = newslash_db.NewslashDB(self._db_config())
+        db.execute('DELETE FROM ns_search_metadata')
+        db.close()
+
+    def get(self, target):
+        sql = 'SELECT * from ns_search_metadata WHERE target_name = %(target)s'
+        db = newslash_db.NewslashDB(self._db_config())
+        cur = db.execute(sql, target=target)
+        if cur.rowcount > 0:
+            result = dict(zip(cur.column_names, cur.fetchone()))
+        else:
+            result = {}
+
+        db.close()
+        return result
+
+    def get_all(self):
+        sql = 'SELECT * from ns_search_metadata'
+        db = newslash_db.NewslashDB(self._db_config())
+        cur = db.execute(sql)
+        if cur.rowcount > 0:
+            #result = dict(zip(cur.column_names, cur.fetchone()))
+            result = [dict(zip(cur.column_names, x)) for x in cur.fetchall()]
+        else:
+            result = []
+
+        db.close()
+        return result
+
+    def update(self, target, last_update, latest_id):
+        if latest_id > 0:
+            sql = (
+                "INSERT INTO ns_search_metadata"
+                "  (target_name, last_update, latest_id)"
+                "  VALUES (%(target)s, %(last_update)s, %(latest_id)s)"
+                "  ON DUPLICATE KEY UPDATE"
+                "    last_update = %(last_update)s,"
+                "    latest_id = %(latest_id)s"
+            )
+        else:
+            sql = (
+                "INSERT INTO ns_search_metadata"
+                "  (target_name, last_update, latest_id)"
+                "  VALUES (%(target)s, %(last_update)s, %(latest_id)s)"
+                "  ON DUPLICATE KEY UPDATE"
+                "    last_update = %(last_update)s"
+            )
+
+        db = newslash_db.NewslashDB(self._db_config())
+        cur = db.execute(sql, target=target, last_update=last_update, latest_id=latest_id)
+        db.close()
+
diff --git a/src/ns_search/tools/_ns_metadata.db b/src/ns_search/tools/_ns_metadata.db
new file mode 100644 (file)
index 0000000..ec0f955
Binary files /dev/null and b/src/ns_search/tools/_ns_metadata.db differ
diff --git a/src/ns_search/tools/migrate_metadata.py b/src/ns_search/tools/migrate_metadata.py
new file mode 100755 (executable)
index 0000000..983fcf3
--- /dev/null
@@ -0,0 +1,60 @@
+#!/usr/bin/python
+# -*- coding: utf-8
+from __future__ import print_function
+
+import sys
+import os.path
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+
+from newslash_index import Index, DatabaseError, Metadata, MySQLMetadata
+
+from yaml import load
+try:
+    from yaml import CLoader as Loader, CDumper as Dumper
+except ImportError:
+    from yaml import Loader, Dumper
+
+CONFIG_FILE = '../searchd.conf'
+OUTPUT_DIR = '.'
+
+def _load_config(pathname):
+    fh = open(pathname)
+    config = load(fh, Loader=Loader)
+    fh.close()
+    return config
+
+    
+def main():
+    config = _load_config(CONFIG_FILE)
+
+    # get old metadata from MySQL DB
+    metadata_old = MySQLMetadata(dict(database=config["Database"]))
+    data = metadata_old.get_all()
+
+    print("==== metadata: ====")
+    for item in data:
+        print(item)
+    
+    # put metadata to file
+    metadata = Metadata(dict(index_path=OUTPUT_DIR))
+
+    print("create table...")
+    metadata.create_table()
+    print("ok")
+    
+    for item in data:
+        sys.stdout.write(".")
+        sys.stdout.flush()
+        metadata.update(item["target_name"], item["last_update"], item["latest_id"])
+
+    print("convert done.")
+    new_data = metadata.get_all()
+    print("==== new metadata: ====")
+    for item in new_data:
+        print(item)
+    
+
+if __name__ == '__main__':
+    main()
+
index ebbec0c..348ed3b 100644 (file)
@@ -20,7 +20,7 @@ run-daemon:
 restart:
        docker restart $(CONTAINER_NAME)
 run:
-       docker run -ti --rm  $(PORT_OPTS) $(MOUNT_OPTS) $(LINK_OPTS) -v $(NEWSLASH_DIR):/var/newslash $(IMAGE_NAME) bash
+       docker run -ti --rm -p 6001:6000 $(MOUNT_OPTS) $(LINK_OPTS) -v $(NEWSLASH_DIR):/var/newslash $(IMAGE_NAME) bash
 
 test-query:
        curl http://$(HOST):6000/ -X POST -H 'Content-Type: application/json' -d '{"query":"firefox", "target":"story"}'